From 10f287a983b32f999302a05483efb27a99aa5c9f Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Tue, 10 Feb 2026 14:32:25 +0200 Subject: [PATCH 01/15] add update_schema --- crates/iceberg/src/transaction/mod.rs | 7 + .../iceberg/src/transaction/update_schema.rs | 1193 +++++++++++++++++ .../integration_tests/tests/update_schema.rs | 409 ++++++ 3 files changed, 1609 insertions(+) create mode 100644 crates/iceberg/src/transaction/update_schema.rs create mode 100644 crates/integration_tests/tests/update_schema.rs diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 074c7fefe4..d9ffa38881 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -58,6 +58,7 @@ mod snapshot; mod sort_order; mod update_location; mod update_properties; +mod update_schema; mod update_statistics; mod upgrade_format_version; @@ -74,6 +75,7 @@ use crate::transaction::append::FastAppendAction; use crate::transaction::sort_order::ReplaceSortOrderAction; use crate::transaction::update_location::UpdateLocationAction; use crate::transaction::update_properties::UpdatePropertiesAction; +use crate::transaction::update_schema::UpdateSchemaAction; use crate::transaction::update_statistics::UpdateStatisticsAction; use crate::transaction::upgrade_format_version::UpgradeFormatVersionAction; use crate::{Catalog, TableCommit, TableRequirement, TableUpdate}; @@ -136,6 +138,11 @@ impl Transaction { UpdatePropertiesAction::new() } + /// Creates an update schema action. + pub fn update_schema(&self) -> UpdateSchemaAction { + UpdateSchemaAction::new() + } + /// Creates a fast append action. pub fn fast_append(&self) -> FastAppendAction { FastAppendAction::new() diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs new file mode 100644 index 0000000000..3caa275be1 --- /dev/null +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -0,0 +1,1193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use async_trait::async_trait; + +use crate::spec::{ + ListType, Literal, MapType, NestedField, NestedFieldRef, Schema, StructType, Type, +}; +use crate::table::Table; +use crate::transaction::action::{ActionCommit, TransactionAction}; +use crate::{Error, ErrorKind, Result, TableRequirement, TableUpdate}; + +/// Sentinel parent ID representing the table root (top-level columns). +const TABLE_ROOT_ID: i32 = -1; + +/// A pending column addition, recording the parent path and the field to add. +struct PendingAdd { + /// `None` means a root-level addition; `Some("person")` or `Some("person.address")` + /// identifies the nested struct to add the column to. + parent: Option, + /// The field to add. Uses placeholder ID `0` which is auto-assigned at commit time. + field: NestedFieldRef, +} + +/// Schema evolution API modeled after the Java `SchemaUpdate` implementation. +/// +/// This action accumulates schema modifications (column additions and deletions) +/// via builder methods. At commit time, it validates all operations against the +/// current table schema, auto-assigns field IDs from `table.metadata().last_column_id()`, +/// builds a new schema, and emits `AddSchema` + `SetCurrentSchema` updates with a +/// `CurrentSchemaIdMatch` requirement. +/// +/// # Example +/// +/// ```ignore +/// let tx = Transaction::new(&table); +/// let action = tx.update_schema() +/// .add_column("new_col", Type::Primitive(PrimitiveType::Int)) +/// .add_column_to("person", "email", Type::Primitive(PrimitiveType::String)) +/// .delete_column("old_col"); +/// let tx = action.apply(tx).unwrap(); +/// let table = tx.commit(&catalog).await.unwrap(); +/// ``` +pub struct UpdateSchemaAction { + additions: Vec, + deletes: Vec, + auto_assign_ids: bool, +} + +impl UpdateSchemaAction { + /// Creates a new empty `UpdateSchemaAction`. + pub(crate) fn new() -> Self { + Self { + additions: Vec::new(), + deletes: Vec::new(), + auto_assign_ids: true, + } + } + + // --- Root-level additions --- + + /// Add a `NestedFieldRef` column to the table root. + pub fn add_field(self, field: NestedFieldRef) -> Self { + self.add_field_internal(None, field) + } + + /// Add an optional column to the table root. + /// + /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. + pub fn add_column(self, name: impl ToString, field_type: Type) -> Self { + self.add_field(Arc::new(NestedField::optional(0, name, field_type))) + } + + /// Add an optional column with a doc string to the table root. + /// + /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. + pub fn add_column_with_doc( + self, + name: impl ToString, + field_type: Type, + doc: impl ToString, + ) -> Self { + self.add_field(Arc::new( + NestedField::optional(0, name, field_type).with_doc(doc), + )) + } + + /// Add a required column to the table root. + /// + /// An `initial_default` value is required per the Iceberg spec: it is used to populate + /// this field for all records that were written before the field was added. + /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. + pub fn add_required_column( + self, + name: impl ToString, + field_type: Type, + initial_default: Literal, + ) -> Self { + self.add_field(Arc::new( + NestedField::required(0, name, field_type).with_initial_default(initial_default), + )) + } + + /// Add a required column with a doc string to the table root. + /// + /// An `initial_default` value is required per the Iceberg spec: it is used to populate + /// this field for all records that were written before the field was added. + /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. + pub fn add_required_column_with_doc( + self, + name: impl ToString, + field_type: Type, + initial_default: Literal, + doc: impl ToString, + ) -> Self { + self.add_field(Arc::new( + NestedField::required(0, name, field_type) + .with_initial_default(initial_default) + .with_doc(doc), + )) + } + + // --- Nested additions --- + + /// Add a `NestedFieldRef` column under a parent struct identified by name. + /// + /// If the parent is a map, the column is added to the map value's struct. + /// If the parent is a list, the column is added to the list element's struct. + pub fn add_field_to(self, parent: impl ToString, field: NestedFieldRef) -> Self { + self.add_field_internal(Some(parent.to_string()), field) + } + + /// Add an optional column under a parent struct identified by name. + /// + /// The `parent` can be a dotted path (e.g. `"person"` or `"person.address"`). + /// If the parent is a map, the column is added to the map value's struct. + /// If the parent is a list, the column is added to the list element's struct. + /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. + pub fn add_column_to( + self, + parent: impl ToString, + name: impl ToString, + field_type: Type, + ) -> Self { + self.add_field_to(parent, Arc::new(NestedField::optional(0, name, field_type))) + } + + /// Add an optional column with a doc string under a parent struct. + /// + /// See [`add_column_to`](Self::add_column_to) for parent path details. + pub fn add_column_to_with_doc( + self, + parent: impl ToString, + name: impl ToString, + field_type: Type, + doc: impl ToString, + ) -> Self { + self.add_field_to( + parent, + Arc::new(NestedField::optional(0, name, field_type).with_doc(doc)), + ) + } + + /// Add a required column under a parent struct. + /// + /// See [`add_column_to`](Self::add_column_to) for parent path details. + /// An `initial_default` value is required per the Iceberg spec. + pub fn add_required_column_to( + self, + parent: impl ToString, + name: impl ToString, + field_type: Type, + initial_default: Literal, + ) -> Self { + self.add_field_to( + parent, + Arc::new( + NestedField::required(0, name, field_type).with_initial_default(initial_default), + ), + ) + } + + /// Add a required column with a doc string under a parent struct. + /// + /// See [`add_column_to`](Self::add_column_to) for parent path details. + /// An `initial_default` value is required per the Iceberg spec. + pub fn add_required_column_to_with_doc( + self, + parent: impl ToString, + name: impl ToString, + field_type: Type, + initial_default: Literal, + doc: impl ToString, + ) -> Self { + self.add_field_to( + parent, + Arc::new( + NestedField::required(0, name, field_type) + .with_initial_default(initial_default) + .with_doc(doc), + ), + ) + } + + // --- Other builder methods --- + + /// Record a column deletion by name. + /// + /// At commit time, the column must exist in the current schema. + pub fn delete_column(mut self, name: impl ToString) -> Self { + self.deletes.push(name.to_string()); + self + } + + /// Disable automatic field ID assignment. When disabled, the placeholder IDs + /// provided in builder methods are used as-is. + pub fn disable_id_auto_assignment(mut self) -> Self { + self.auto_assign_ids = false; + self + } + + // --- Internal helpers --- + + fn add_field_internal(mut self, parent: Option, field: NestedFieldRef) -> Self { + self.additions.push(PendingAdd { parent, field }); + self + } +} + +// --------------------------------------------------------------------------- +// ID assignment helpers +// --------------------------------------------------------------------------- + +/// Recursively assign fresh field IDs to a `NestedField` and all its nested sub-fields. +/// +/// This follows the same recursive pattern as `ReassignFieldIds::reassign_ids_visit_type` +/// from `crate::spec::schema::id_reassigner`, but operates on new fields with placeholder +/// IDs rather than reassigning an existing schema. `ReassignFieldIds` cannot be used +/// directly here because it rejects duplicate old IDs (all new fields share placeholder +/// ID `0`). +fn assign_fresh_ids(field: &NestedField, next_id: &mut i32) -> NestedFieldRef { + *next_id += 1; + let new_id = *next_id; + let new_type = assign_fresh_ids_to_type(&field.field_type, next_id); + + Arc::new(NestedField { + id: new_id, + name: field.name.clone(), + required: field.required, + field_type: Box::new(new_type), + doc: field.doc.clone(), + initial_default: field.initial_default.clone(), + write_default: field.write_default.clone(), + }) +} + +/// Recursively assign fresh field IDs to all nested fields within a `Type`. +fn assign_fresh_ids_to_type(field_type: &Type, next_id: &mut i32) -> Type { + match field_type { + Type::Primitive(_) => field_type.clone(), + Type::Struct(struct_type) => { + let new_fields: Vec = struct_type + .fields() + .iter() + .map(|f| assign_fresh_ids(f, next_id)) + .collect(); + Type::Struct(StructType::new(new_fields)) + } + Type::List(list_type) => { + let new_element = assign_fresh_ids(&list_type.element_field, next_id); + Type::List(ListType { + element_field: new_element, + }) + } + Type::Map(map_type) => { + let new_key = assign_fresh_ids(&map_type.key_field, next_id); + let new_value = assign_fresh_ids(&map_type.value_field, next_id); + Type::Map(MapType { + key_field: new_key, + value_field: new_value, + }) + } + } +} + +// --------------------------------------------------------------------------- +// Parent path resolution +// --------------------------------------------------------------------------- + +/// Resolve a parent path to the target struct's parent field ID and a reference +/// to its `StructType`. +/// +/// If the parent is a map, navigates to the value field. If a list, navigates to +/// the element field. The final target must be a struct type. +fn resolve_parent_target<'a>( + base_schema: &'a Schema, + parent: &str, +) -> Result<(i32, &'a StructType)> { + base_schema + .field_by_name(parent) + .ok_or_else(|| { + Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot add column: parent '{parent}' not found"), + ) + }) + .and_then(|parent_field| match parent_field.field_type.as_ref() { + Type::Struct(s) => Ok((parent_field.id, s)), + Type::Map(m) => match m.value_field.field_type.as_ref() { + Type::Struct(s) => Ok((m.value_field.id, s)), + _ => Err(Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot add column: map value of '{parent}' is not a struct"), + )), + }, + Type::List(l) => match l.element_field.field_type.as_ref() { + Type::Struct(s) => Ok((l.element_field.id, s)), + _ => Err(Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot add column: list element of '{parent}' is not a struct"), + )), + }, + _ => Err(Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot add column: parent '{parent}' is not a struct, map, or list"), + )), + }) +} + +// --------------------------------------------------------------------------- +// Schema tree rebuild +// --------------------------------------------------------------------------- + +/// Rebuild a slice of fields, applying deletions and additions at every level, +/// plus any root-level additions keyed by `TABLE_ROOT_ID`. +fn rebuild_fields( + fields: &[NestedFieldRef], + adds: &HashMap>, + delete_ids: &HashSet, + root_id: i32, +) -> Vec { + fields + .iter() + .filter(|f| !delete_ids.contains(&f.id)) + .map(|f| rebuild_field(f, adds, delete_ids)) + .chain(adds.get(&root_id).into_iter().flatten().cloned()) + .collect() +} + +/// Recursively rebuild a single field. If the field (or any descendant) is a struct +/// that has pending additions, those additions are appended to the struct's fields. +/// Fields whose IDs appear in `delete_ids` are filtered out at every struct level. +fn rebuild_field( + field: &NestedFieldRef, + adds: &HashMap>, + delete_ids: &HashSet, +) -> NestedFieldRef { + match field.field_type.as_ref() { + Type::Primitive(_) => field.clone(), + Type::Struct(s) => { + let new_fields = rebuild_fields(s.fields(), adds, delete_ids, field.id); + Arc::new(NestedField { + id: field.id, + name: field.name.clone(), + required: field.required, + field_type: Box::new(Type::Struct(StructType::new(new_fields))), + doc: field.doc.clone(), + initial_default: field.initial_default.clone(), + write_default: field.write_default.clone(), + }) + } + Type::List(l) => { + let new_element = rebuild_field(&l.element_field, adds, delete_ids); + Arc::new(NestedField { + id: field.id, + name: field.name.clone(), + required: field.required, + field_type: Box::new(Type::List(ListType { + element_field: new_element, + })), + doc: field.doc.clone(), + initial_default: field.initial_default.clone(), + write_default: field.write_default.clone(), + }) + } + Type::Map(m) => { + let new_key = rebuild_field(&m.key_field, adds, delete_ids); + let new_value = rebuild_field(&m.value_field, adds, delete_ids); + Arc::new(NestedField { + id: field.id, + name: field.name.clone(), + required: field.required, + field_type: Box::new(Type::Map(MapType { + key_field: new_key, + value_field: new_value, + })), + doc: field.doc.clone(), + initial_default: field.initial_default.clone(), + write_default: field.write_default.clone(), + }) + } + } +} + +// --------------------------------------------------------------------------- +// TransactionAction implementation +// --------------------------------------------------------------------------- + +#[async_trait] +impl TransactionAction for UpdateSchemaAction { + async fn commit(self: Arc, table: &Table) -> Result { + let base_schema = table.metadata().current_schema(); + let mut last_column_id = table.metadata().last_column_id(); + + // --- 1. Validate deletes --- + let delete_ids = self + .deletes + .iter() + .map(|name: &String| { + base_schema + .field_by_name(name) + .ok_or_else(|| { + Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot delete missing column: {name}"), + ) + }) + .and_then(|field| { + match base_schema + .identifier_field_ids() + .find(|id| *id == field.id) + { + Some(_) => Err(Error::new( + ErrorKind::PreconditionFailed, + format!("Cannot delete identifier field: {name}"), + )), + None => Ok(field.id), + } + }) + }) + .collect::>>()?; + + // --- 2. Resolve parents, validate additions, assign IDs, and group by parent ID --- + // We assign IDs inline (before grouping) to preserve the caller's insertion order, + // since HashMap iteration order is non-deterministic. + let mut additions_by_parent: HashMap> = HashMap::new(); + + for pending in &self.additions { + // Check that name does not contain ".". + if pending.field.name.contains('.') { + return Err(Error::new( + ErrorKind::PreconditionFailed, + format!( + "Cannot add column with ambiguous name: {}. Use the `add_column_to` method to add a column to a nested struct.", + pending.field.name + ), + )); + } + + // Required columns without an initial default need allow_incompatible_changes. + if pending.field.required && pending.field.initial_default.is_none() { + return Err(Error::new( + ErrorKind::PreconditionFailed, + format!( + "Incompatible change: cannot add required column without an initial default: {}", + pending.field.name + ), + )); + } + + let parent_id = match &pending.parent { + None => { + // Root-level: check name conflict against root-level fields. + if let Some(existing) = base_schema.field_by_name(&pending.field.name) + && !delete_ids.contains(&existing.id) + { + return Err(Error::new( + ErrorKind::PreconditionFailed, + format!( + "Cannot add column, name already exists: {}", + pending.field.name + ), + )); + } + TABLE_ROOT_ID + } + Some(parent_path) => { + // Nested: resolve parent, check name conflict within parent struct. + let (parent_id, parent_struct) = + resolve_parent_target(base_schema, parent_path)?; + + if parent_struct + .fields() + .iter() + .any(|f| f.name == pending.field.name && !delete_ids.contains(&f.id)) + { + return Err(Error::new( + ErrorKind::PreconditionFailed, + format!( + "Cannot add column, name already exists in '{}': {}", + parent_path, pending.field.name + ), + )); + } + + parent_id + } + }; + + // Assign fresh IDs immediately, preserving insertion order. + let field = if self.auto_assign_ids { + assign_fresh_ids(&pending.field, &mut last_column_id) + } else { + pending.field.clone() + }; + + additions_by_parent + .entry(parent_id) + .or_default() + .push(field); + } + + // --- 4. Rebuild the schema tree with additions and deletions --- + let new_fields = rebuild_fields( + base_schema.as_struct().fields(), + &additions_by_parent, + &delete_ids, + TABLE_ROOT_ID, + ); + + // --- 5. Build the new schema --- + let schema = Schema::builder() + .with_fields(new_fields) + .with_identifier_field_ids(base_schema.identifier_field_ids()) + .build()?; + + let updates = vec![ + TableUpdate::AddSchema { schema }, + TableUpdate::SetCurrentSchema { schema_id: -1 }, + ]; + + let requirements = vec![TableRequirement::CurrentSchemaIdMatch { + current_schema_id: base_schema.schema_id(), + }]; + + Ok(ActionCommit::new(updates, requirements)) + } +} + +#[cfg(test)] +mod tests { + use std::io::BufReader; + use std::sync::Arc; + + use as_any::Downcast; + + use crate::spec::{Literal, NestedField, PrimitiveType, StructType, TableMetadata, Type}; + use crate::table::Table; + use crate::transaction::Transaction; + use crate::transaction::action::{ApplyTransactionAction, TransactionAction}; + use crate::transaction::tests::make_v2_table; + use crate::transaction::update_schema::UpdateSchemaAction; + use crate::{ErrorKind, TableIdent, TableRequirement, TableUpdate}; + + // The V2 test table has: + // last_column_id: 3 + // current schema (id=1): x(1, req, long), y(2, req, long), z(3, req, long) + // identifier_field_ids: [1, 2] + + /// Build a V2 test table that includes nested types: + /// + /// last_column_id: 14 + /// current schema (id=0): + /// x(1, req, long) -- identifier + /// y(2, req, long) -- identifier + /// z(3, req, long) + /// person(4, opt, struct) + /// name(5, opt, string) + /// age(6, req, int) + /// tags(7, opt, list) + /// element(8, req, struct) + /// key(9, opt, string) + /// value(10, opt, string) + /// props(11, opt, map) + /// key(12, req, string) + /// value(13, req, struct) + /// data(14, opt, string) + fn make_v2_table_with_nested() -> Table { + let json = r#"{ + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c2", + "location": "s3://bucket/test/location", + "last-sequence-number": 0, + "last-updated-ms": 1602638573590, + "last-column-id": 14, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "identifier-field-ids": [1, 2], + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"}, + {"id": 2, "name": "y", "required": true, "type": "long"}, + {"id": 3, "name": "z", "required": true, "type": "long"}, + {"id": 4, "name": "person", "required": false, "type": { + "type": "struct", + "fields": [ + {"id": 5, "name": "name", "required": false, "type": "string"}, + {"id": 6, "name": "age", "required": true, "type": "int"} + ] + }}, + {"id": 7, "name": "tags", "required": false, "type": { + "type": "list", + "element-id": 8, + "element": { + "type": "struct", + "fields": [ + {"id": 9, "name": "key", "required": false, "type": "string"}, + {"id": 10, "name": "value", "required": false, "type": "string"} + ] + }, + "element-required": true + }}, + {"id": 11, "name": "props", "required": false, "type": { + "type": "map", + "key-id": 12, + "key": "string", + "value-id": 13, + "value": { + "type": "struct", + "fields": [ + {"id": 14, "name": "data", "required": false, "type": "string"} + ] + }, + "value-required": true + }} + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + {"spec-id": 0, "fields": []} + ], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [ + {"order-id": 0, "fields": []} + ], + "properties": {}, + "current-snapshot-id": -1, + "snapshots": [] + }"#; + + let reader = BufReader::new(json.as_bytes()); + let metadata = serde_json::from_reader::<_, TableMetadata>(reader).unwrap(); + + Table::builder() + .metadata(metadata) + .metadata_location("s3://bucket/test/location/metadata/v1.json".to_string()) + .identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap()) + .file_io(crate::io::FileIOBuilder::new("memory").build().unwrap()) + .build() + .unwrap() + } + + // ----------------------------------------------------------------------- + // Existing root-level tests + // ----------------------------------------------------------------------- + + #[tokio::test] + async fn test_add_column() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let action = tx + .update_schema() + .add_column("new_col", Type::Primitive(PrimitiveType::Int)); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + let requirements = action_commit.take_requirements(); + + assert_eq!(updates.len(), 2); + + // Extract the new schema from the AddSchema update. + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + // The new field should have ID = last_column_id + 1 = 4. + let new_field = new_schema + .field_by_name("new_col") + .expect("new_col should exist"); + assert_eq!(new_field.id, 4); + assert!(!new_field.required); + assert_eq!(*new_field.field_type, Type::Primitive(PrimitiveType::Int)); + assert!(new_field.doc.is_none()); + + // Original fields should still be there. + assert!(new_schema.field_by_name("x").is_some()); + assert!(new_schema.field_by_name("y").is_some()); + assert!(new_schema.field_by_name("z").is_some()); + + assert_eq!(updates[1], TableUpdate::SetCurrentSchema { schema_id: -1 }); + + // Verify requirement. + assert_eq!(requirements.len(), 1); + assert_eq!(requirements[0], TableRequirement::CurrentSchemaIdMatch { + current_schema_id: table.metadata().current_schema().schema_id() + }); + } + + #[tokio::test] + async fn test_add_column_with_doc() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().add_column_with_doc( + "documented_col", + Type::Primitive(PrimitiveType::String), + "A documented column", + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + let field = new_schema + .field_by_name("documented_col") + .expect("documented_col should exist"); + assert_eq!(field.id, 4); + assert!(!field.required); + assert_eq!(field.doc.as_deref(), Some("A documented column")); + } + + #[tokio::test] + async fn test_add_required_column_with_initial_default() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().add_required_column( + "req_col", + Type::Primitive(PrimitiveType::Int), + Literal::int(0), + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + let field = new_schema + .field_by_name("req_col") + .expect("req_col should exist"); + assert_eq!(field.id, 4); + assert!(field.required); + assert_eq!(field.initial_default, Some(Literal::int(0))); + } + + #[tokio::test] + async fn test_add_column_name_conflict_fails() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + // "x" already exists in the V2 test schema. + let action = tx + .update_schema() + .add_column("x", Type::Primitive(PrimitiveType::Int)); + + let result = Arc::new(action).commit(&table).await; + let err = match result { + Err(e) => e, + Ok(_) => panic!("should reject adding a column with an existing name"), + }; + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + assert!( + err.message().contains("already exists"), + "error should mention name conflict, got: {}", + err.message() + ); + } + + #[tokio::test] + async fn test_delete_column() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + // z is not an identifier field, so we can delete it. + let action = tx.update_schema().delete_column("z"); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + assert!( + new_schema.field_by_name("z").is_none(), + "z should be deleted" + ); + assert!(new_schema.field_by_name("x").is_some()); + assert!(new_schema.field_by_name("y").is_some()); + } + + #[tokio::test] + async fn test_delete_missing_column_fails() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().delete_column("nonexistent"); + + let result = Arc::new(action).commit(&table).await; + let err = match result { + Err(e) => e, + Ok(_) => panic!("should reject deleting a non-existent column"), + }; + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + assert!( + err.message().contains("nonexistent"), + "error should mention the missing column, got: {}", + err.message() + ); + } + + #[tokio::test] + async fn test_add_and_delete_combined() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + // Delete z, add a new column. + let action = tx + .update_schema() + .delete_column("z") + .add_column("w", Type::Primitive(PrimitiveType::Boolean)); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + assert!( + new_schema.field_by_name("z").is_none(), + "z should be deleted" + ); + let w = new_schema.field_by_name("w").expect("w should exist"); + assert_eq!(w.id, 4); + assert!(!w.required); + } + + #[tokio::test] + async fn test_delete_and_readd_same_name() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + // Delete z, then add a new column named z -- should succeed. + let action = tx + .update_schema() + .delete_column("z") + .add_column("z", Type::Primitive(PrimitiveType::Boolean)); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + let z = new_schema + .field_by_name("z") + .expect("z should exist with new type"); + assert_eq!(z.id, 4); // new ID, not the old 3 + assert_eq!(*z.field_type, Type::Primitive(PrimitiveType::Boolean)); + } + + #[test] + fn test_apply() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let tx = tx + .update_schema() + .add_column("new_col", Type::Primitive(PrimitiveType::Int)) + .apply(tx) + .unwrap(); + + assert_eq!(tx.actions.len(), 1); + (*tx.actions[0]) + .downcast_ref::() + .expect("UpdateSchemaAction was not applied to Transaction!"); + } + + // ----------------------------------------------------------------------- + // Nested add tests + // ----------------------------------------------------------------------- + + #[tokio::test] + async fn test_add_column_to_struct() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // Add "email" to the "person" struct. + let action = tx.update_schema().add_column_to( + "person", + "email", + Type::Primitive(PrimitiveType::String), + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + // "email" should be nested under "person" with ID = last_column_id + 1 = 15. + let email = new_schema + .field_by_name("person.email") + .expect("person.email should exist"); + assert_eq!(email.id, 15); + assert!(!email.required); + assert_eq!(*email.field_type, Type::Primitive(PrimitiveType::String)); + + // Original nested fields should still be there. + assert!(new_schema.field_by_name("person.name").is_some()); + assert!(new_schema.field_by_name("person.age").is_some()); + } + + #[tokio::test] + async fn test_add_column_to_struct_with_doc() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().add_column_to_with_doc( + "person", + "phone", + Type::Primitive(PrimitiveType::String), + "Phone number", + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + let phone = new_schema + .field_by_name("person.phone") + .expect("person.phone should exist"); + assert_eq!(phone.id, 15); + assert_eq!(phone.doc.as_deref(), Some("Phone number")); + } + + #[tokio::test] + async fn test_add_column_to_list_element_struct() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // "tags" is a list. Adding to the list navigates to its + // element struct automatically. + let action = tx.update_schema().add_column_to( + "tags", + "score", + Type::Primitive(PrimitiveType::Double), + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + // The list element struct should now contain "score". + let score = new_schema + .field_by_name("tags.element.score") + .expect("tags.element.score should exist"); + assert_eq!(score.id, 15); + assert!(!score.required); + + // Existing fields preserved. + assert!(new_schema.field_by_name("tags.element.key").is_some()); + assert!(new_schema.field_by_name("tags.element.value").is_some()); + } + + #[tokio::test] + async fn test_add_column_to_map_value_struct() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // "props" is a map. Adding to the map navigates to its + // value struct automatically. + let action = tx.update_schema().add_column_to( + "props", + "version", + Type::Primitive(PrimitiveType::Int), + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + let version = new_schema + .field_by_name("props.value.version") + .expect("props.value.version should exist"); + assert_eq!(version.id, 15); + + // Existing map value fields preserved. + assert!(new_schema.field_by_name("props.value.data").is_some()); + } + + #[tokio::test] + async fn test_add_column_to_nonexistent_parent_fails() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().add_column_to( + "nonexistent", + "col", + Type::Primitive(PrimitiveType::Int), + ); + + let err = match Arc::new(action).commit(&table).await { + Err(e) => e, + Ok(_) => panic!("should reject adding to a nonexistent parent"), + }; + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + assert!( + err.message().contains("nonexistent"), + "error should mention the missing parent, got: {}", + err.message() + ); + } + + #[tokio::test] + async fn test_add_column_to_primitive_parent_fails() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // "x" is a primitive (long), not a struct. + let action = + tx.update_schema() + .add_column_to("x", "col", Type::Primitive(PrimitiveType::Int)); + + let err = match Arc::new(action).commit(&table).await { + Err(e) => e, + Ok(_) => panic!("should reject adding to a primitive parent"), + }; + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + assert!( + err.message().contains("not a struct"), + "error should mention type mismatch, got: {}", + err.message() + ); + } + + #[tokio::test] + async fn test_add_column_to_nested_name_conflict_fails() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // "name" already exists in the "person" struct. + let action = tx.update_schema().add_column_to( + "person", + "name", + Type::Primitive(PrimitiveType::String), + ); + + let err = match Arc::new(action).commit(&table).await { + Err(e) => e, + Ok(_) => panic!("should reject adding a column with conflicting name"), + }; + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + assert!( + err.message().contains("already exists"), + "error should mention name conflict, got: {}", + err.message() + ); + } + + #[tokio::test] + async fn test_root_and_nested_add_combined() { + let table = make_v2_table_with_nested(); + let tx = Transaction::new(&table); + + // Add a root column and a nested column in the same action. + let action = tx + .update_schema() + .add_column("root_col", Type::Primitive(PrimitiveType::Boolean)) + .add_column_to("person", "email", Type::Primitive(PrimitiveType::String)); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + // Root column gets the first fresh ID. + let root_col = new_schema + .field_by_name("root_col") + .expect("root_col should exist"); + assert_eq!(root_col.id, 15); + + // Nested column gets the next ID. + let email = new_schema + .field_by_name("person.email") + .expect("person.email should exist"); + assert_eq!(email.id, 16); + } + + #[tokio::test] + async fn test_add_nested_struct_type_with_fresh_ids() { + // Exercises the assign_fresh_ids bug fix: adding a new column whose TYPE + // contains nested fields (e.g. a struct column). All sub-fields must receive + // fresh IDs, not placeholder 0. + let table = make_v2_table(); + let tx = Transaction::new(&table); + + let action = tx.update_schema().add_column( + "address", + Type::Struct(StructType::new(vec![ + NestedField::optional(0, "street", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(0, "city", Type::Primitive(PrimitiveType::String)).into(), + ])), + ); + + let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = action_commit.take_updates(); + + let new_schema = match &updates[0] { + TableUpdate::AddSchema { schema } => schema, + other => panic!("expected AddSchema, got {:?}", other), + }; + + // "address" gets ID 4 (last_column_id=3, +1). + let address = new_schema + .field_by_name("address") + .expect("address should exist"); + assert_eq!(address.id, 4); + + // Sub-fields get IDs 5 and 6. + let street = new_schema + .field_by_name("address.street") + .expect("address.street should exist"); + assert_eq!(street.id, 5); + + let city = new_schema + .field_by_name("address.city") + .expect("address.city should exist"); + assert_eq!(city.id, 6); + } +} diff --git a/crates/integration_tests/tests/update_schema.rs b/crates/integration_tests/tests/update_schema.rs new file mode 100644 index 0000000000..87a1d63f16 --- /dev/null +++ b/crates/integration_tests/tests/update_schema.rs @@ -0,0 +1,409 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for the `UpdateSchemaAction`. + +mod common; + +use std::sync::Arc; + +use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray, StructArray}; +use common::{random_ns, test_schema}; +use futures::TryStreamExt; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; +use iceberg::writer::file_writer::location_generator::{ + DefaultFileNameGenerator, DefaultLocationGenerator, +}; +use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder; +use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +use iceberg::{Catalog, CatalogBuilder, TableCreation}; +use iceberg_catalog_rest::RestCatalogBuilder; +use iceberg_integration_tests::get_test_fixture; +use parquet::arrow::arrow_reader::ArrowReaderOptions; +use parquet::file::properties::WriterProperties; + +/// Creates a table, appends data, adds a new field to the schema, +/// verifies existing data is still readable, then appends data with the new schema. +#[tokio::test] +async fn test_add_field() { + let fixture = get_test_fixture(); + let rest_catalog = RestCatalogBuilder::default() + .load("rest", fixture.catalog_config.clone()) + .await + .unwrap(); + let ns = random_ns().await; + let schema = test_schema(); + + let table_creation = TableCreation::builder() + .name("t1".to_string()) + .schema(schema.clone()) + .build(); + + let table = rest_catalog + .create_table(ns.name(), table_creation) + .await + .unwrap(); + + // Create the writer and write initial data + let arrow_schema: Arc = Arc::new( + table + .metadata() + .current_schema() + .as_ref() + .try_into() + .unwrap(), + ); + let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + let file_name_generator = DefaultFileNameGenerator::new( + "test".to_string(), + None, + iceberg::spec::DataFileFormat::Parquet, + ); + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + ); + let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( + parquet_writer_builder, + table.file_io().clone(), + location_generator.clone(), + file_name_generator.clone(), + ); + let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); + let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); + let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); + let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); + let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); + let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + ]) + .unwrap(); + data_file_writer.write(batch.clone()).await.unwrap(); + let data_file = data_file_writer.close().await.unwrap(); + + // Check parquet file schema has the expected field IDs + let content = table + .file_io() + .new_input(data_file[0].file_path()) + .unwrap() + .read() + .await + .unwrap(); + let parquet_reader = parquet::arrow::arrow_reader::ArrowReaderMetadata::load( + &content, + ArrowReaderOptions::default(), + ) + .unwrap(); + let field_ids: Vec = parquet_reader + .parquet_schema() + .columns() + .iter() + .map(|col| col.self_type().get_basic_info().id()) + .collect(); + assert_eq!(field_ids, vec![1, 2, 3]); + + // Commit the initial data + let tx = Transaction::new(&table); + let append_action = tx.fast_append().add_data_files(data_file.clone()); + let tx = append_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Verify the initial data is readable + let batch_stream = table + .scan() + .select_all() + .build() + .unwrap() + .to_arrow() + .await + .unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + assert_eq!(batches.len(), 1); + assert_eq!(batches[0], batch); + + // Add a new optional primitive field to the table + let tx = Transaction::new(&table); + let add_action = tx.update_schema().add_column( + "a", + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::Int), + ); + let tx = add_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Verify existing data is still readable after schema evolution + let batch_stream = table + .scan() + .select_all() + .build() + .unwrap() + .to_arrow() + .await + .unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + assert_eq!(batches.len(), 1); + assert_eq!(batches[0], batch); + + // Add a struct column, then add a nested column inside it + let tx = Transaction::new(&table); + let add_action = tx.update_schema().add_column( + "info", + iceberg::spec::Type::Struct(iceberg::spec::StructType::new(vec![Arc::new( + iceberg::spec::NestedField::optional( + 0, + "city", + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), + ), + )])), + ); + let tx = add_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Verify the struct column was added + let schema = table.metadata().current_schema(); + let info_field = schema.field_by_name("info").expect("info field should exist"); + assert!(matches!(info_field.field_type.as_ref(), iceberg::spec::Type::Struct(_))); + let city_field = schema + .field_by_name("info.city") + .expect("info.city field should exist"); + assert!(matches!( + city_field.field_type.as_ref(), + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String) + )); + + // Add a nested column to the struct + let tx = Transaction::new(&table); + let add_action = tx.update_schema().add_column_to( + "info", + "zip", + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), + ); + let tx = add_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Verify the nested column was added + let schema = table.metadata().current_schema(); + let zip_field = schema + .field_by_name("info.zip") + .expect("info.zip field should exist"); + assert!(matches!( + zip_field.field_type.as_ref(), + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String) + )); + + // Verify existing data is still readable + let batch_stream = table + .scan() + .select_all() + .build() + .unwrap() + .to_arrow() + .await + .unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + assert_eq!(batches.len(), 1); + + // Create a new writer with the evolved schema and write data including the new field + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + ); + let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( + parquet_writer_builder, + table.file_io().clone(), + location_generator.clone(), + file_name_generator.clone(), + ); + let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); + let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); + let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); + let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); + let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); + let col4 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); + let evolved_arrow_schema: Arc = Arc::new( + table + .metadata() + .current_schema() + .as_ref() + .try_into() + .unwrap(), + ); + // Build a struct array for the "info" column: {city, zip} + let city_array = StringArray::from(vec![Some("NYC"), Some("LA"), None, Some("SF")]); + let zip_array = StringArray::from(vec![Some("10001"), None, Some("90001"), Some("94101")]); + let info_fields = evolved_arrow_schema + .field_with_name("info") + .unwrap() + .data_type() + .clone(); + let struct_fields = match &info_fields { + arrow_schema::DataType::Struct(fields) => fields.clone(), + _ => panic!("expected struct type for info"), + }; + let info_array = StructArray::try_new( + struct_fields, + vec![ + Arc::new(city_array) as ArrayRef, + Arc::new(zip_array) as ArrayRef, + ], + None, + ) + .unwrap(); + let batch_with_new_field = RecordBatch::try_new(evolved_arrow_schema.clone(), vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + Arc::new(col4) as ArrayRef, + Arc::new(info_array) as ArrayRef, + ]) + .unwrap(); + data_file_writer + .write(batch_with_new_field.clone()) + .await + .unwrap(); + let data_file = data_file_writer.close().await.unwrap(); + + // Commit the new data with evolved schema + let tx = Transaction::new(&table); + let append_action = tx.fast_append().add_data_files(data_file.clone()); + let tx = append_action.apply(tx).unwrap(); + let _table = tx.commit(&rest_catalog).await.unwrap(); +} + +/// Creates a table, adds data, deletes a non-identifier column, +/// and verifies the schema was updated and existing data is still readable. +#[tokio::test] +async fn test_delete_field() { + let fixture = get_test_fixture(); + let rest_catalog = RestCatalogBuilder::default() + .load("rest", fixture.catalog_config.clone()) + .await + .unwrap(); + let ns = random_ns().await; + let schema = test_schema(); + + let table_creation = TableCreation::builder() + .name("t_delete".to_string()) + .schema(schema.clone()) + .build(); + + let table = rest_catalog + .create_table(ns.name(), table_creation) + .await + .unwrap(); + + // Write initial data with all three columns + let arrow_schema: Arc = Arc::new( + table + .metadata() + .current_schema() + .as_ref() + .try_into() + .unwrap(), + ); + let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); + let file_name_generator = DefaultFileNameGenerator::new( + "test".to_string(), + None, + iceberg::spec::DataFileFormat::Parquet, + ); + let parquet_writer_builder = ParquetWriterBuilder::new( + WriterProperties::default(), + table.metadata().current_schema().clone(), + ); + let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( + parquet_writer_builder, + table.file_io().clone(), + location_generator.clone(), + file_name_generator.clone(), + ); + let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); + let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); + let col1 = StringArray::from(vec![Some("foo"), Some("bar")]); + let col2 = Int32Array::from(vec![Some(1), Some(2)]); + let col3 = BooleanArray::from(vec![Some(true), Some(false)]); + let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + ]) + .unwrap(); + data_file_writer.write(batch.clone()).await.unwrap(); + let data_file = data_file_writer.close().await.unwrap(); + + // Commit the initial data + let tx = Transaction::new(&table); + let append_action = tx.fast_append().add_data_files(data_file.clone()); + let tx = append_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Delete the optional "baz" column (field 3, not an identifier) + let tx = Transaction::new(&table); + let delete_action = tx.update_schema().delete_column("baz"); + let tx = delete_action.apply(tx).unwrap(); + let table = tx.commit(&rest_catalog).await.unwrap(); + + // Verify the schema no longer contains "baz" + let schema = table.metadata().current_schema(); + assert!( + schema.field_by_name("baz").is_none(), + "baz should have been deleted" + ); + assert!( + schema.field_by_name("foo").is_some(), + "foo should still exist" + ); + assert!( + schema.field_by_name("bar").is_some(), + "bar should still exist" + ); + + // Verify existing data is still readable after the column deletion + let batch_stream = table + .scan() + .select_all() + .build() + .unwrap() + .to_arrow() + .await + .unwrap(); + let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + assert_eq!(batches.len(), 1); + + // Deleting an identifier field should fail + let tx = Transaction::new(&table); + let delete_action = tx.update_schema().delete_column("bar"); + let tx = delete_action.apply(tx).unwrap(); + let result = tx.commit(&rest_catalog).await; + assert!( + result.is_err(), + "deleting an identifier field should fail" + ); + + // Deleting a non-existent column should fail + let tx = Transaction::new(&table); + let delete_action = tx.update_schema().delete_column("nonexistent"); + let tx = delete_action.apply(tx).unwrap(); + let result = tx.commit(&rest_catalog).await; + assert!( + result.is_err(), + "deleting a non-existent column should fail" + ); +} From 6829c7fc38c5fe6bf661c7a0de7a10b54d5b89eb Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Tue, 10 Feb 2026 14:41:36 +0200 Subject: [PATCH 02/15] Cargo format update --- crates/integration_tests/tests/update_schema.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/integration_tests/tests/update_schema.rs b/crates/integration_tests/tests/update_schema.rs index 87a1d63f16..789a55ad69 100644 --- a/crates/integration_tests/tests/update_schema.rs +++ b/crates/integration_tests/tests/update_schema.rs @@ -178,8 +178,13 @@ async fn test_add_field() { // Verify the struct column was added let schema = table.metadata().current_schema(); - let info_field = schema.field_by_name("info").expect("info field should exist"); - assert!(matches!(info_field.field_type.as_ref(), iceberg::spec::Type::Struct(_))); + let info_field = schema + .field_by_name("info") + .expect("info field should exist"); + assert!(matches!( + info_field.field_type.as_ref(), + iceberg::spec::Type::Struct(_) + )); let city_field = schema .field_by_name("info.city") .expect("info.city field should exist"); @@ -392,10 +397,7 @@ async fn test_delete_field() { let delete_action = tx.update_schema().delete_column("bar"); let tx = delete_action.apply(tx).unwrap(); let result = tx.commit(&rest_catalog).await; - assert!( - result.is_err(), - "deleting an identifier field should fail" - ); + assert!(result.is_err(), "deleting an identifier field should fail"); // Deleting a non-existent column should fail let tx = Transaction::new(&table); From 0e5d3e67e2dc1a3bf6ec3203f5cb75787ddfb8bc Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Tue, 10 Feb 2026 14:52:06 +0200 Subject: [PATCH 03/15] Fix clippy! --- .../iceberg/src/transaction/update_schema.rs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 3caa275be1..c828d4e438 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -703,7 +703,7 @@ mod tests { // Extract the new schema from the AddSchema update. let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; // The new field should have ID = last_column_id + 1 = 4. @@ -745,7 +745,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; let field = new_schema @@ -772,7 +772,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; let field = new_schema @@ -819,7 +819,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; assert!( @@ -866,7 +866,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; assert!( @@ -894,7 +894,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; let z = new_schema @@ -942,7 +942,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; // "email" should be nested under "person" with ID = last_column_id + 1 = 15. @@ -975,7 +975,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; let phone = new_schema @@ -1003,7 +1003,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; // The list element struct should now contain "score". @@ -1036,7 +1036,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; let version = new_schema @@ -1133,7 +1133,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; // Root column gets the first fresh ID. @@ -1170,7 +1170,7 @@ mod tests { let new_schema = match &updates[0] { TableUpdate::AddSchema { schema } => schema, - other => panic!("expected AddSchema, got {:?}", other), + other => panic!("expected AddSchema, got {other:?}"), }; // "address" gets ID 4 (last_column_id=3, +1). From 86e514238122ccaca5e7cd2ebc651e1690d9b39f Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 9 Mar 2026 14:32:53 +0200 Subject: [PATCH 04/15] Add write_default to new fields --- crates/iceberg/src/transaction/update_schema.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index c828d4e438..33a4fc2dc5 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -114,7 +114,9 @@ impl UpdateSchemaAction { initial_default: Literal, ) -> Self { self.add_field(Arc::new( - NestedField::required(0, name, field_type).with_initial_default(initial_default), + NestedField::required(0, name, field_type) + .with_initial_default(initial_default.clone()) + .with_write_default(initial_default), )) } @@ -132,7 +134,8 @@ impl UpdateSchemaAction { ) -> Self { self.add_field(Arc::new( NestedField::required(0, name, field_type) - .with_initial_default(initial_default) + .with_initial_default(initial_default.clone()) + .with_write_default(initial_default) .with_doc(doc), )) } @@ -192,7 +195,9 @@ impl UpdateSchemaAction { self.add_field_to( parent, Arc::new( - NestedField::required(0, name, field_type).with_initial_default(initial_default), + NestedField::required(0, name, field_type) + .with_initial_default(initial_default.clone()) + .with_write_default(initial_default), ), ) } @@ -213,7 +218,8 @@ impl UpdateSchemaAction { parent, Arc::new( NestedField::required(0, name, field_type) - .with_initial_default(initial_default) + .with_initial_default(initial_default.clone()) + .with_write_default(initial_default) .with_doc(doc), ), ) @@ -781,6 +787,7 @@ mod tests { assert_eq!(field.id, 4); assert!(field.required); assert_eq!(field.initial_default, Some(Literal::int(0))); + assert_eq!(field.write_default, Some(Literal::int(0))); } #[tokio::test] From cc92962635fc96b93b9e1ea81c7b7e62d3698806 Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 9 Mar 2026 14:55:56 +0200 Subject: [PATCH 05/15] Fix new FileIO api --- crates/iceberg/src/transaction/update_schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 33a4fc2dc5..b1ce9f3496 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -682,7 +682,7 @@ mod tests { .metadata(metadata) .metadata_location("s3://bucket/test/location/metadata/v1.json".to_string()) .identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap()) - .file_io(crate::io::FileIOBuilder::new("memory").build().unwrap()) + .file_io(crate::io::FileIO::new_with_memory()) .build() .unwrap() } From 22e7bb851bef04dfc7fcdd67b4074e8141fd5ee5 Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 9 Mar 2026 15:13:16 +0200 Subject: [PATCH 06/15] Add integration test storage factory --- crates/integration_tests/tests/update_schema.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/integration_tests/tests/update_schema.rs b/crates/integration_tests/tests/update_schema.rs index 789a55ad69..17b106f669 100644 --- a/crates/integration_tests/tests/update_schema.rs +++ b/crates/integration_tests/tests/update_schema.rs @@ -35,6 +35,7 @@ use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; use iceberg::{Catalog, CatalogBuilder, TableCreation}; use iceberg_catalog_rest::RestCatalogBuilder; use iceberg_integration_tests::get_test_fixture; +use iceberg_storage_opendal::OpenDalStorageFactory; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::file::properties::WriterProperties; @@ -44,6 +45,10 @@ use parquet::file::properties::WriterProperties; async fn test_add_field() { let fixture = get_test_fixture(); let rest_catalog = RestCatalogBuilder::default() + .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3".to_string(), + customized_credential_load: None, + })) .load("rest", fixture.catalog_config.clone()) .await .unwrap(); @@ -298,6 +303,10 @@ async fn test_add_field() { async fn test_delete_field() { let fixture = get_test_fixture(); let rest_catalog = RestCatalogBuilder::default() + .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { + configured_scheme: "s3".to_string(), + customized_credential_load: None, + })) .load("rest", fixture.catalog_config.clone()) .await .unwrap(); From 35b0dfa342daac65fd342d3a8e7eef4c071255e0 Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 16 Mar 2026 14:09:28 +0200 Subject: [PATCH 07/15] add extra delete check for parent --- crates/iceberg/src/transaction/update_schema.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index b1ce9f3496..b8c9845e59 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -512,11 +512,11 @@ impl TransactionAction for UpdateSchemaAction { let (parent_id, parent_struct) = resolve_parent_target(base_schema, parent_path)?; - if parent_struct - .fields() - .iter() - .any(|f| f.name == pending.field.name && !delete_ids.contains(&f.id)) - { + if parent_struct.fields().iter().any(|f| { + f.name == pending.field.name + && !delete_ids.contains(&f.id) + && !delete_ids.contains(&parent_id) + }) { return Err(Error::new( ErrorKind::PreconditionFailed, format!( From 2216f3cabae85677652a890ec38cbabd5ea7e70f Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 16 Mar 2026 15:59:20 +0200 Subject: [PATCH 08/15] Refactor AddColumn API --- crates/iceberg/src/transaction/mod.rs | 1 + .../iceberg/src/transaction/update_schema.rs | 391 ++++++++---------- .../integration_tests/tests/update_schema.rs | 20 +- 3 files changed, 176 insertions(+), 236 deletions(-) diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 1cd63e2221..c865b5033b 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -66,6 +66,7 @@ use std::sync::Arc; use std::time::Duration; use backon::{BackoffBuilder, ExponentialBackoff, ExponentialBuilder, RetryableWithContext}; +pub use update_schema::AddColumn; use crate::error::Result; use crate::spec::TableProperties; diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index b8c9845e59..5d0b435d8d 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -19,6 +19,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use async_trait::async_trait; +use typed_builder::TypedBuilder; use crate::spec::{ ListType, Literal, MapType, NestedField, NestedFieldRef, Schema, StructType, Type, @@ -29,14 +30,78 @@ use crate::{Error, ErrorKind, Result, TableRequirement, TableUpdate}; /// Sentinel parent ID representing the table root (top-level columns). const TABLE_ROOT_ID: i32 = -1; +// Default ID for a new column. This will be re-assigned to a fresh ID at commit time. +const DEFAULT_ID: i32 = 0; -/// A pending column addition, recording the parent path and the field to add. -struct PendingAdd { - /// `None` means a root-level addition; `Some("person")` or `Some("person.address")` - /// identifies the nested struct to add the column to. +#[derive(TypedBuilder)] +/// Declarative specification for adding a column in [`UpdateSchemaAction`]. +/// +/// Use helper constructors such as [`AddColumn::optional`] and [`AddColumn::required`], +/// optionally combined with [`AddColumn::with_parent`] and [`AddColumn::with_doc`], then pass +/// the value to +/// [`UpdateSchemaAction::add_column`]. +pub struct AddColumn { + #[builder(default = None, setter(strip_option, into))] parent: Option, - /// The field to add. Uses placeholder ID `0` which is auto-assigned at commit time. - field: NestedFieldRef, + #[builder(setter(into))] + name: String, + #[builder(default = false)] + required: bool, + field_type: Type, + #[builder(default = None, setter(strip_option, into))] + doc: Option, + #[builder(default = None, setter(strip_option))] + initial_default: Option, + #[builder(default = None, setter(strip_option))] + write_default: Option, +} + +impl AddColumn { + /// Create a root-level optional column specification. + pub fn optional(name: impl ToString, field_type: Type) -> Self { + Self::builder() + .name(name.to_string()) + .field_type(field_type) + .required(false) + .build() + } + + /// Create a root-level required column specification. + pub fn required(name: impl ToString, field_type: Type, initial_default: Literal) -> Self { + Self::builder() + .name(name.to_string()) + .field_type(field_type) + .required(true) + .initial_default(initial_default.clone()) + .write_default(initial_default) + .build() + } + + /// Return a copy with an updated parent path. + pub fn with_parent(mut self, parent: impl ToString) -> Self { + self.parent = Some(parent.to_string()); + self + } + + /// Return a copy with an updated doc string. + pub fn with_doc(mut self, doc: impl ToString) -> Self { + self.doc = Some(doc.to_string()); + self + } + + fn to_nested_field(&self) -> NestedFieldRef { + let mut field = NestedField::new( + DEFAULT_ID, + self.name.clone(), + self.field_type.clone(), + self.required, + ); + + field.doc = self.doc.clone(); + field.initial_default = self.initial_default.clone(); + field.write_default = self.write_default.clone(); + Arc::new(field) + } } /// Schema evolution API modeled after the Java `SchemaUpdate` implementation. @@ -52,14 +117,17 @@ struct PendingAdd { /// ```ignore /// let tx = Transaction::new(&table); /// let action = tx.update_schema() -/// .add_column("new_col", Type::Primitive(PrimitiveType::Int)) -/// .add_column_to("person", "email", Type::Primitive(PrimitiveType::String)) +/// .add_column(AddColumn::optional("new_col", Type::Primitive(PrimitiveType::Int))) +/// .add_column( +/// AddColumn::optional("email", Type::Primitive(PrimitiveType::String)) +/// .with_parent("person") +/// ) /// .delete_column("old_col"); /// let tx = action.apply(tx).unwrap(); /// let table = tx.commit(&catalog).await.unwrap(); /// ``` pub struct UpdateSchemaAction { - additions: Vec, + additions: Vec, deletes: Vec, auto_assign_ids: bool, } @@ -76,153 +144,14 @@ impl UpdateSchemaAction { // --- Root-level additions --- - /// Add a `NestedFieldRef` column to the table root. - pub fn add_field(self, field: NestedFieldRef) -> Self { - self.add_field_internal(None, field) - } - - /// Add an optional column to the table root. - /// - /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. - pub fn add_column(self, name: impl ToString, field_type: Type) -> Self { - self.add_field(Arc::new(NestedField::optional(0, name, field_type))) - } - - /// Add an optional column with a doc string to the table root. - /// - /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. - pub fn add_column_with_doc( - self, - name: impl ToString, - field_type: Type, - doc: impl ToString, - ) -> Self { - self.add_field(Arc::new( - NestedField::optional(0, name, field_type).with_doc(doc), - )) - } - - /// Add a required column to the table root. - /// - /// An `initial_default` value is required per the Iceberg spec: it is used to populate - /// this field for all records that were written before the field was added. - /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. - pub fn add_required_column( - self, - name: impl ToString, - field_type: Type, - initial_default: Literal, - ) -> Self { - self.add_field(Arc::new( - NestedField::required(0, name, field_type) - .with_initial_default(initial_default.clone()) - .with_write_default(initial_default), - )) - } - - /// Add a required column with a doc string to the table root. - /// - /// An `initial_default` value is required per the Iceberg spec: it is used to populate - /// this field for all records that were written before the field was added. - /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. - pub fn add_required_column_with_doc( - self, - name: impl ToString, - field_type: Type, - initial_default: Literal, - doc: impl ToString, - ) -> Self { - self.add_field(Arc::new( - NestedField::required(0, name, field_type) - .with_initial_default(initial_default.clone()) - .with_write_default(initial_default) - .with_doc(doc), - )) - } - - // --- Nested additions --- - - /// Add a `NestedFieldRef` column under a parent struct identified by name. - /// - /// If the parent is a map, the column is added to the map value's struct. - /// If the parent is a list, the column is added to the list element's struct. - pub fn add_field_to(self, parent: impl ToString, field: NestedFieldRef) -> Self { - self.add_field_internal(Some(parent.to_string()), field) - } - - /// Add an optional column under a parent struct identified by name. - /// - /// The `parent` can be a dotted path (e.g. `"person"` or `"person.address"`). - /// If the parent is a map, the column is added to the map value's struct. - /// If the parent is a list, the column is added to the list element's struct. - /// The field ID is a placeholder (`0`) and will be auto-assigned at commit time. - pub fn add_column_to( - self, - parent: impl ToString, - name: impl ToString, - field_type: Type, - ) -> Self { - self.add_field_to(parent, Arc::new(NestedField::optional(0, name, field_type))) - } - - /// Add an optional column with a doc string under a parent struct. - /// - /// See [`add_column_to`](Self::add_column_to) for parent path details. - pub fn add_column_to_with_doc( - self, - parent: impl ToString, - name: impl ToString, - field_type: Type, - doc: impl ToString, - ) -> Self { - self.add_field_to( - parent, - Arc::new(NestedField::optional(0, name, field_type).with_doc(doc)), - ) - } - - /// Add a required column under a parent struct. - /// - /// See [`add_column_to`](Self::add_column_to) for parent path details. - /// An `initial_default` value is required per the Iceberg spec. - pub fn add_required_column_to( - self, - parent: impl ToString, - name: impl ToString, - field_type: Type, - initial_default: Literal, - ) -> Self { - self.add_field_to( - parent, - Arc::new( - NestedField::required(0, name, field_type) - .with_initial_default(initial_default.clone()) - .with_write_default(initial_default), - ), - ) - } - - /// Add a required column with a doc string under a parent struct. + /// Add a column to the table schema. /// - /// See [`add_column_to`](Self::add_column_to) for parent path details. - /// An `initial_default` value is required per the Iceberg spec. - pub fn add_required_column_to_with_doc( - self, - parent: impl ToString, - name: impl ToString, - field_type: Type, - initial_default: Literal, - doc: impl ToString, - ) -> Self { - self.add_field_to( - parent, - Arc::new( - NestedField::required(0, name, field_type) - .with_initial_default(initial_default.clone()) - .with_write_default(initial_default) - .with_doc(doc), - ), - ) + /// To add a root-level column, leave `AddColumn::parent` as `None`. + /// For nested additions, set a parent path (for example via [`AddColumn::with_parent`]). + /// If the parent resolves to a map/list, the column is added to map value/list element. + pub fn add_column(mut self, add_column: AddColumn) -> Self { + self.additions.push(add_column); + self } // --- Other builder methods --- @@ -243,11 +172,6 @@ impl UpdateSchemaAction { } // --- Internal helpers --- - - fn add_field_internal(mut self, parent: Option, field: NestedFieldRef) -> Self { - self.additions.push(PendingAdd { parent, field }); - self - } } // --------------------------------------------------------------------------- @@ -260,7 +184,7 @@ impl UpdateSchemaAction { /// from `crate::spec::schema::id_reassigner`, but operates on new fields with placeholder /// IDs rather than reassigning an existing schema. `ReassignFieldIds` cannot be used /// directly here because it rejects duplicate old IDs (all new fields share placeholder -/// ID `0`). +/// ID `DEFAULT_ID`). fn assign_fresh_ids(field: &NestedField, next_id: &mut i32) -> NestedFieldRef { *next_id += 1; let new_id = *next_id; @@ -468,40 +392,42 @@ impl TransactionAction for UpdateSchemaAction { // since HashMap iteration order is non-deterministic. let mut additions_by_parent: HashMap> = HashMap::new(); - for pending in &self.additions { + for add in &self.additions { + let pending_field = add.to_nested_field(); + // Check that name does not contain ".". - if pending.field.name.contains('.') { + if pending_field.name.contains('.') { return Err(Error::new( ErrorKind::PreconditionFailed, format!( - "Cannot add column with ambiguous name: {}. Use the `add_column_to` method to add a column to a nested struct.", - pending.field.name + "Cannot add column with ambiguous name: {}. Use `AddColumn::with_parent` to add a column to a nested struct.", + pending_field.name ), )); } // Required columns without an initial default need allow_incompatible_changes. - if pending.field.required && pending.field.initial_default.is_none() { + if pending_field.required && pending_field.initial_default.is_none() { return Err(Error::new( ErrorKind::PreconditionFailed, format!( "Incompatible change: cannot add required column without an initial default: {}", - pending.field.name + pending_field.name ), )); } - let parent_id = match &pending.parent { + let parent_id = match &add.parent { None => { // Root-level: check name conflict against root-level fields. - if let Some(existing) = base_schema.field_by_name(&pending.field.name) + if let Some(existing) = base_schema.field_by_name(&pending_field.name) && !delete_ids.contains(&existing.id) { return Err(Error::new( ErrorKind::PreconditionFailed, format!( "Cannot add column, name already exists: {}", - pending.field.name + pending_field.name ), )); } @@ -513,7 +439,7 @@ impl TransactionAction for UpdateSchemaAction { resolve_parent_target(base_schema, parent_path)?; if parent_struct.fields().iter().any(|f| { - f.name == pending.field.name + f.name == pending_field.name && !delete_ids.contains(&f.id) && !delete_ids.contains(&parent_id) }) { @@ -521,7 +447,7 @@ impl TransactionAction for UpdateSchemaAction { ErrorKind::PreconditionFailed, format!( "Cannot add column, name already exists in '{}': {}", - parent_path, pending.field.name + parent_path, pending_field.name ), )); } @@ -532,9 +458,9 @@ impl TransactionAction for UpdateSchemaAction { // Assign fresh IDs immediately, preserving insertion order. let field = if self.auto_assign_ids { - assign_fresh_ids(&pending.field, &mut last_column_id) + assign_fresh_ids(&pending_field, &mut last_column_id) } else { - pending.field.clone() + pending_field }; additions_by_parent @@ -582,7 +508,7 @@ mod tests { use crate::transaction::Transaction; use crate::transaction::action::{ApplyTransactionAction, TransactionAction}; use crate::transaction::tests::make_v2_table; - use crate::transaction::update_schema::UpdateSchemaAction; + use crate::transaction::update_schema::{AddColumn, DEFAULT_ID, UpdateSchemaAction}; use crate::{ErrorKind, TableIdent, TableRequirement, TableUpdate}; // The V2 test table has: @@ -696,9 +622,10 @@ mod tests { let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx - .update_schema() - .add_column("new_col", Type::Primitive(PrimitiveType::Int)); + let action = tx.update_schema().add_column(AddColumn::optional( + "new_col", + Type::Primitive(PrimitiveType::Int), + )); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -740,10 +667,9 @@ mod tests { let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column_with_doc( - "documented_col", - Type::Primitive(PrimitiveType::String), - "A documented column", + let action = tx.update_schema().add_column( + AddColumn::optional("documented_col", Type::Primitive(PrimitiveType::String)) + .with_doc("A documented column"), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); @@ -767,11 +693,11 @@ mod tests { let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_required_column( + let action = tx.update_schema().add_column(AddColumn::required( "req_col", Type::Primitive(PrimitiveType::Int), Literal::int(0), - ); + )); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -796,9 +722,10 @@ mod tests { let tx = Transaction::new(&table); // "x" already exists in the V2 test schema. - let action = tx - .update_schema() - .add_column("x", Type::Primitive(PrimitiveType::Int)); + let action = tx.update_schema().add_column(AddColumn::optional( + "x", + Type::Primitive(PrimitiveType::Int), + )); let result = Arc::new(action).commit(&table).await; let err = match result { @@ -866,7 +793,10 @@ mod tests { let action = tx .update_schema() .delete_column("z") - .add_column("w", Type::Primitive(PrimitiveType::Boolean)); + .add_column(AddColumn::optional( + "w", + Type::Primitive(PrimitiveType::Boolean), + )); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -894,7 +824,10 @@ mod tests { let action = tx .update_schema() .delete_column("z") - .add_column("z", Type::Primitive(PrimitiveType::Boolean)); + .add_column(AddColumn::optional( + "z", + Type::Primitive(PrimitiveType::Boolean), + )); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -918,7 +851,10 @@ mod tests { let tx = tx .update_schema() - .add_column("new_col", Type::Primitive(PrimitiveType::Int)) + .add_column(AddColumn::optional( + "new_col", + Type::Primitive(PrimitiveType::Int), + )) .apply(tx) .unwrap(); @@ -938,10 +874,9 @@ mod tests { let tx = Transaction::new(&table); // Add "email" to the "person" struct. - let action = tx.update_schema().add_column_to( - "person", - "email", - Type::Primitive(PrimitiveType::String), + let action = tx.update_schema().add_column( + AddColumn::optional("email", Type::Primitive(PrimitiveType::String)) + .with_parent("person"), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); @@ -970,11 +905,10 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column_to_with_doc( - "person", - "phone", - Type::Primitive(PrimitiveType::String), - "Phone number", + let action = tx.update_schema().add_column( + AddColumn::optional("phone", Type::Primitive(PrimitiveType::String)) + .with_parent("person") + .with_doc("Phone number"), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); @@ -999,10 +933,9 @@ mod tests { // "tags" is a list. Adding to the list navigates to its // element struct automatically. - let action = tx.update_schema().add_column_to( - "tags", - "score", - Type::Primitive(PrimitiveType::Double), + let action = tx.update_schema().add_column( + AddColumn::optional("score", Type::Primitive(PrimitiveType::Double)) + .with_parent("tags"), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); @@ -1032,10 +965,9 @@ mod tests { // "props" is a map. Adding to the map navigates to its // value struct automatically. - let action = tx.update_schema().add_column_to( - "props", - "version", - Type::Primitive(PrimitiveType::Int), + let action = tx.update_schema().add_column( + AddColumn::optional("version", Type::Primitive(PrimitiveType::Int)) + .with_parent("props"), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); @@ -1060,10 +992,9 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column_to( - "nonexistent", - "col", - Type::Primitive(PrimitiveType::Int), + let action = tx.update_schema().add_column( + AddColumn::optional("col", Type::Primitive(PrimitiveType::Int)) + .with_parent("nonexistent"), ); let err = match Arc::new(action).commit(&table).await { @@ -1084,9 +1015,9 @@ mod tests { let tx = Transaction::new(&table); // "x" is a primitive (long), not a struct. - let action = - tx.update_schema() - .add_column_to("x", "col", Type::Primitive(PrimitiveType::Int)); + let action = tx.update_schema().add_column( + AddColumn::optional("col", Type::Primitive(PrimitiveType::Int)).with_parent("x"), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1106,10 +1037,9 @@ mod tests { let tx = Transaction::new(&table); // "name" already exists in the "person" struct. - let action = tx.update_schema().add_column_to( - "person", - "name", - Type::Primitive(PrimitiveType::String), + let action = tx.update_schema().add_column( + AddColumn::optional("name", Type::Primitive(PrimitiveType::String)) + .with_parent("person"), ); let err = match Arc::new(action).commit(&table).await { @@ -1132,8 +1062,14 @@ mod tests { // Add a root column and a nested column in the same action. let action = tx .update_schema() - .add_column("root_col", Type::Primitive(PrimitiveType::Boolean)) - .add_column_to("person", "email", Type::Primitive(PrimitiveType::String)); + .add_column(AddColumn::optional( + "root_col", + Type::Primitive(PrimitiveType::Boolean), + )) + .add_column( + AddColumn::optional("email", Type::Primitive(PrimitiveType::String)) + .with_parent("person"), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -1158,19 +1094,20 @@ mod tests { #[tokio::test] async fn test_add_nested_struct_type_with_fresh_ids() { - // Exercises the assign_fresh_ids bug fix: adding a new column whose TYPE - // contains nested fields (e.g. a struct column). All sub-fields must receive - // fresh IDs, not placeholder 0. + // Adding a new column whose TYPE contains nested fields (e.g. a struct column). All sub-fields must receive + // fresh IDs, not placeholder `DEFAULT_ID`. let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column( + let action = tx.update_schema().add_column(AddColumn::optional( "address", Type::Struct(StructType::new(vec![ - NestedField::optional(0, "street", Type::Primitive(PrimitiveType::String)).into(), - NestedField::optional(0, "city", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(DEFAULT_ID, "street", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::optional(DEFAULT_ID, "city", Type::Primitive(PrimitiveType::String)) + .into(), ])), - ); + )); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); diff --git a/crates/integration_tests/tests/update_schema.rs b/crates/integration_tests/tests/update_schema.rs index 17b106f669..b49a66cc7c 100644 --- a/crates/integration_tests/tests/update_schema.rs +++ b/crates/integration_tests/tests/update_schema.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray, StructArray}; use common::{random_ns, test_schema}; use futures::TryStreamExt; -use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::transaction::{AddColumn, ApplyTransactionAction, Transaction}; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::file_writer::location_generator::{ @@ -146,10 +146,10 @@ async fn test_add_field() { // Add a new optional primitive field to the table let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column( + let add_action = tx.update_schema().add_column(AddColumn::optional( "a", iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::Int), - ); + )); let tx = add_action.apply(tx).unwrap(); let table = tx.commit(&rest_catalog).await.unwrap(); @@ -168,7 +168,7 @@ async fn test_add_field() { // Add a struct column, then add a nested column inside it let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column( + let add_action = tx.update_schema().add_column(AddColumn::optional( "info", iceberg::spec::Type::Struct(iceberg::spec::StructType::new(vec![Arc::new( iceberg::spec::NestedField::optional( @@ -177,7 +177,7 @@ async fn test_add_field() { iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), ), )])), - ); + )); let tx = add_action.apply(tx).unwrap(); let table = tx.commit(&rest_catalog).await.unwrap(); @@ -200,10 +200,12 @@ async fn test_add_field() { // Add a nested column to the struct let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column_to( - "info", - "zip", - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), + let add_action = tx.update_schema().add_column( + AddColumn::optional( + "zip", + iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), + ) + .with_parent("info"), ); let tx = add_action.apply(tx).unwrap(); let table = tx.commit(&rest_catalog).await.unwrap(); From c3aa21bfd9ca9fc01a12c5fa599753ad442c37fc Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Fri, 20 Mar 2026 14:57:48 +0200 Subject: [PATCH 09/15] implement nits --- crates/iceberg/src/spec/schema/mod.rs | 2 ++ .../iceberg/src/transaction/update_schema.rs | 23 ++++--------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 13ad41818b..9109990e19 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -51,6 +51,8 @@ pub type SchemaId = i32; pub type SchemaRef = Arc; /// Default schema id. pub const DEFAULT_SCHEMA_ID: SchemaId = 0; +/// Delimiter for schema name, which denotes a nested struct. +pub const SCHEMA_NAME_DELIMITER: &str = "."; /// Defines schema in iceberg. #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 5d0b435d8d..5f82c53897 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -22,7 +22,8 @@ use async_trait::async_trait; use typed_builder::TypedBuilder; use crate::spec::{ - ListType, Literal, MapType, NestedField, NestedFieldRef, Schema, StructType, Type, + ListType, Literal, MapType, NestedField, NestedFieldRef, SCHEMA_NAME_DELIMITER, Schema, + StructType, Type, }; use crate::table::Table; use crate::transaction::action::{ActionCommit, TransactionAction}; @@ -33,13 +34,13 @@ const TABLE_ROOT_ID: i32 = -1; // Default ID for a new column. This will be re-assigned to a fresh ID at commit time. const DEFAULT_ID: i32 = 0; -#[derive(TypedBuilder)] /// Declarative specification for adding a column in [`UpdateSchemaAction`]. /// /// Use helper constructors such as [`AddColumn::optional`] and [`AddColumn::required`], /// optionally combined with [`AddColumn::with_parent`] and [`AddColumn::with_doc`], then pass /// the value to /// [`UpdateSchemaAction::add_column`]. +#[derive(TypedBuilder)] pub struct AddColumn { #[builder(default = None, setter(strip_option, into))] parent: Option, @@ -77,18 +78,6 @@ impl AddColumn { .build() } - /// Return a copy with an updated parent path. - pub fn with_parent(mut self, parent: impl ToString) -> Self { - self.parent = Some(parent.to_string()); - self - } - - /// Return a copy with an updated doc string. - pub fn with_doc(mut self, doc: impl ToString) -> Self { - self.doc = Some(doc.to_string()); - self - } - fn to_nested_field(&self) -> NestedFieldRef { let mut field = NestedField::new( DEFAULT_ID, @@ -170,8 +159,6 @@ impl UpdateSchemaAction { self.auto_assign_ids = false; self } - - // --- Internal helpers --- } // --------------------------------------------------------------------------- @@ -395,8 +382,8 @@ impl TransactionAction for UpdateSchemaAction { for add in &self.additions { let pending_field = add.to_nested_field(); - // Check that name does not contain ".". - if pending_field.name.contains('.') { + // Check that name does not contain `SCHEMA_NAME_DELIMITER`. + if pending_field.name.contains(SCHEMA_NAME_DELIMITER) { return Err(Error::new( ErrorKind::PreconditionFailed, format!( From d6804627dff4267215fea407f7b90dbeae70332d Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Fri, 20 Mar 2026 15:06:09 +0200 Subject: [PATCH 10/15] Update unit tests --- .../iceberg/src/transaction/update_schema.rs | 137 ++++++++++++------ 1 file changed, 89 insertions(+), 48 deletions(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 5f82c53897..170a2531d7 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -490,7 +490,7 @@ mod tests { use as_any::Downcast; - use crate::spec::{Literal, NestedField, PrimitiveType, StructType, TableMetadata, Type}; + use crate::spec::{Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, Type}; use crate::table::Table; use crate::transaction::Transaction; use crate::transaction::action::{ApplyTransactionAction, TransactionAction}; @@ -626,19 +626,16 @@ mod tests { other => panic!("expected AddSchema, got {other:?}"), }; - // The new field should have ID = last_column_id + 1 = 4. - let new_field = new_schema - .field_by_name("new_col") - .expect("new_col should exist"); - assert_eq!(new_field.id, 4); - assert!(!new_field.required); - assert_eq!(*new_field.field_type, Type::Primitive(PrimitiveType::Int)); - assert!(new_field.doc.is_none()); - - // Original fields should still be there. - assert!(new_schema.field_by_name("x").is_some()); - assert!(new_schema.field_by_name("y").is_some()); - assert!(new_schema.field_by_name("z").is_some()); + let mut expected_fields = table.metadata().current_schema().as_struct().fields().to_vec(); + expected_fields.push( + NestedField::optional(4, "new_col", Type::Primitive(PrimitiveType::Int)).into(), + ); + let expected_schema = Schema::builder() + .with_fields(expected_fields) + .with_identifier_field_ids(table.metadata().current_schema().identifier_field_ids()) + .build() + .unwrap(); + assert_eq!(new_schema, &expected_schema); assert_eq!(updates[1], TableUpdate::SetCurrentSchema { schema_id: -1 }); @@ -654,10 +651,15 @@ mod tests { let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column( - AddColumn::optional("documented_col", Type::Primitive(PrimitiveType::String)) - .with_doc("A documented column"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("documented_col") + .field_type(Type::Primitive(PrimitiveType::String)) + .doc("A documented column") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -861,10 +863,15 @@ mod tests { let tx = Transaction::new(&table); // Add "email" to the "person" struct. - let action = tx.update_schema().add_column( - AddColumn::optional("email", Type::Primitive(PrimitiveType::String)) - .with_parent("person"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("email") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -892,11 +899,16 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column( - AddColumn::optional("phone", Type::Primitive(PrimitiveType::String)) - .with_parent("person") - .with_doc("Phone number"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("phone") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .doc("Phone number") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -920,10 +932,15 @@ mod tests { // "tags" is a list. Adding to the list navigates to its // element struct automatically. - let action = tx.update_schema().add_column( - AddColumn::optional("score", Type::Primitive(PrimitiveType::Double)) - .with_parent("tags"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("score") + .field_type(Type::Primitive(PrimitiveType::Double)) + .parent("tags") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -952,10 +969,15 @@ mod tests { // "props" is a map. Adding to the map navigates to its // value struct automatically. - let action = tx.update_schema().add_column( - AddColumn::optional("version", Type::Primitive(PrimitiveType::Int)) - .with_parent("props"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("version") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("props") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -979,10 +1001,15 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx.update_schema().add_column( - AddColumn::optional("col", Type::Primitive(PrimitiveType::Int)) - .with_parent("nonexistent"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("col") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("nonexistent") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1002,9 +1029,15 @@ mod tests { let tx = Transaction::new(&table); // "x" is a primitive (long), not a struct. - let action = tx.update_schema().add_column( - AddColumn::optional("col", Type::Primitive(PrimitiveType::Int)).with_parent("x"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("col") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("x") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1024,10 +1057,15 @@ mod tests { let tx = Transaction::new(&table); // "name" already exists in the "person" struct. - let action = tx.update_schema().add_column( - AddColumn::optional("name", Type::Primitive(PrimitiveType::String)) - .with_parent("person"), - ); + let action = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("name") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1054,8 +1092,11 @@ mod tests { Type::Primitive(PrimitiveType::Boolean), )) .add_column( - AddColumn::optional("email", Type::Primitive(PrimitiveType::String)) - .with_parent("person"), + AddColumn::builder() + .name("email") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .build(), ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); From d134608eb4a66f897fcd68b8aaa5592542d277e4 Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Fri, 20 Mar 2026 15:15:09 +0200 Subject: [PATCH 11/15] Move integration tests --- .../loader/tests/loader_catalog_test.rs | 165 +++++++ .../integration_tests/tests/update_schema.rs | 422 ------------------ 2 files changed, 165 insertions(+), 422 deletions(-) create mode 100644 crates/catalog/loader/tests/loader_catalog_test.rs delete mode 100644 crates/integration_tests/tests/update_schema.rs diff --git a/crates/catalog/loader/tests/loader_catalog_test.rs b/crates/catalog/loader/tests/loader_catalog_test.rs new file mode 100644 index 0000000000..92a85e91ca --- /dev/null +++ b/crates/catalog/loader/tests/loader_catalog_test.rs @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Catalog tests for schema evolution with `MemoryCatalog`. + +use std::collections::HashMap; + +use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; +use iceberg::transaction::{AddColumn, ApplyTransactionAction, Transaction}; +use iceberg::{Catalog, CatalogBuilder, ErrorKind, NamespaceIdent, TableCreation, TableIdent}; +use tempfile::TempDir; + +fn base_schema() -> Schema { + Schema::builder() + .with_fields(vec![ + NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), + ]) + .with_identifier_field_ids(vec![2]) + .build() + .unwrap() +} + +async fn new_catalog() -> (iceberg::MemoryCatalog, TempDir) { + let warehouse = TempDir::new().unwrap(); + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([( + MEMORY_CATALOG_WAREHOUSE.to_string(), + warehouse.path().to_string_lossy().to_string(), + )]), + ) + .await + .unwrap(); + + (catalog, warehouse) +} + +async fn create_table(catalog: &iceberg::MemoryCatalog, table_name: &str) -> TableIdent { + let ns = NamespaceIdent::new("schema_evolution".to_string()); + if catalog.get_namespace(&ns).await.is_err() { + catalog.create_namespace(&ns, HashMap::new()).await.unwrap(); + } + + let table_ident = TableIdent::new(ns.clone(), table_name.to_string()); + let _ = catalog.drop_table(&table_ident).await; + + catalog + .create_table( + &ns, + TableCreation::builder() + .name(table_name.to_string()) + .schema(base_schema()) + .build(), + ) + .await + .unwrap(); + + table_ident +} + +#[tokio::test] +async fn test_add_field_with_memory_catalog() { + let (catalog, _warehouse) = new_catalog().await; + let table_ident = create_table(&catalog, "t_add_field").await; + let table = catalog.load_table(&table_ident).await.unwrap(); + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column(AddColumn::optional("a", Type::Primitive(PrimitiveType::Int))) + .apply(tx) + .unwrap(); + + let updated_table = tx.commit(&catalog).await.unwrap(); + let schema = updated_table.metadata().current_schema(); + + let field_a = schema.field_by_name("a").expect("a should exist"); + assert_eq!(field_a.id, 4); + assert_eq!(*field_a.field_type, Type::Primitive(PrimitiveType::Int)); +} + +#[tokio::test] +async fn test_add_nested_and_delete_field_with_memory_catalog() { + let (catalog, _warehouse) = new_catalog().await; + let table_ident = create_table(&catalog, "t_add_nested_delete").await; + let table = catalog.load_table(&table_ident).await.unwrap(); + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column(AddColumn::optional( + "info", + Type::Struct(iceberg::spec::StructType::new(vec![ + NestedField::optional( + 0, + "city", + Type::Primitive(PrimitiveType::String), + ) + .into(), + ])), + )) + .apply(tx) + .unwrap(); + let table = tx.commit(&catalog).await.unwrap(); + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("zip") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("info") + .build(), + ) + .delete_column("baz") + .apply(tx) + .unwrap(); + let table = tx.commit(&catalog).await.unwrap(); + + let schema = table.metadata().current_schema(); + assert!(schema.field_by_name("info").is_some()); + assert!(schema.field_by_name("info.city").is_some()); + assert!(schema.field_by_name("info.zip").is_some()); + assert!(schema.field_by_name("baz").is_none()); +} + +#[tokio::test] +async fn test_delete_identifier_and_missing_field_fail_with_memory_catalog() { + let (catalog, _warehouse) = new_catalog().await; + let table_ident = create_table(&catalog, "t_delete_failures").await; + let table = catalog.load_table(&table_ident).await.unwrap(); + + let tx = Transaction::new(&table); + let tx = tx.update_schema().delete_column("bar").apply(tx).unwrap(); + let err = tx.commit(&catalog).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .delete_column("nonexistent") + .apply(tx) + .unwrap(); + let err = tx.commit(&catalog).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); +} diff --git a/crates/integration_tests/tests/update_schema.rs b/crates/integration_tests/tests/update_schema.rs deleted file mode 100644 index b49a66cc7c..0000000000 --- a/crates/integration_tests/tests/update_schema.rs +++ /dev/null @@ -1,422 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Integration tests for the `UpdateSchemaAction`. - -mod common; - -use std::sync::Arc; - -use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray, StructArray}; -use common::{random_ns, test_schema}; -use futures::TryStreamExt; -use iceberg::transaction::{AddColumn, ApplyTransactionAction, Transaction}; -use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; -use iceberg::writer::file_writer::ParquetWriterBuilder; -use iceberg::writer::file_writer::location_generator::{ - DefaultFileNameGenerator, DefaultLocationGenerator, -}; -use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder; -use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; -use iceberg::{Catalog, CatalogBuilder, TableCreation}; -use iceberg_catalog_rest::RestCatalogBuilder; -use iceberg_integration_tests::get_test_fixture; -use iceberg_storage_opendal::OpenDalStorageFactory; -use parquet::arrow::arrow_reader::ArrowReaderOptions; -use parquet::file::properties::WriterProperties; - -/// Creates a table, appends data, adds a new field to the schema, -/// verifies existing data is still readable, then appends data with the new schema. -#[tokio::test] -async fn test_add_field() { - let fixture = get_test_fixture(); - let rest_catalog = RestCatalogBuilder::default() - .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { - configured_scheme: "s3".to_string(), - customized_credential_load: None, - })) - .load("rest", fixture.catalog_config.clone()) - .await - .unwrap(); - let ns = random_ns().await; - let schema = test_schema(); - - let table_creation = TableCreation::builder() - .name("t1".to_string()) - .schema(schema.clone()) - .build(); - - let table = rest_catalog - .create_table(ns.name(), table_creation) - .await - .unwrap(); - - // Create the writer and write initial data - let arrow_schema: Arc = Arc::new( - table - .metadata() - .current_schema() - .as_ref() - .try_into() - .unwrap(), - ); - let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); - let file_name_generator = DefaultFileNameGenerator::new( - "test".to_string(), - None, - iceberg::spec::DataFileFormat::Parquet, - ); - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - ); - let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( - parquet_writer_builder, - table.file_io().clone(), - location_generator.clone(), - file_name_generator.clone(), - ); - let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); - let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); - let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); - let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); - let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); - let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - ]) - .unwrap(); - data_file_writer.write(batch.clone()).await.unwrap(); - let data_file = data_file_writer.close().await.unwrap(); - - // Check parquet file schema has the expected field IDs - let content = table - .file_io() - .new_input(data_file[0].file_path()) - .unwrap() - .read() - .await - .unwrap(); - let parquet_reader = parquet::arrow::arrow_reader::ArrowReaderMetadata::load( - &content, - ArrowReaderOptions::default(), - ) - .unwrap(); - let field_ids: Vec = parquet_reader - .parquet_schema() - .columns() - .iter() - .map(|col| col.self_type().get_basic_info().id()) - .collect(); - assert_eq!(field_ids, vec![1, 2, 3]); - - // Commit the initial data - let tx = Transaction::new(&table); - let append_action = tx.fast_append().add_data_files(data_file.clone()); - let tx = append_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Verify the initial data is readable - let batch_stream = table - .scan() - .select_all() - .build() - .unwrap() - .to_arrow() - .await - .unwrap(); - let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); - assert_eq!(batches.len(), 1); - assert_eq!(batches[0], batch); - - // Add a new optional primitive field to the table - let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column(AddColumn::optional( - "a", - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::Int), - )); - let tx = add_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Verify existing data is still readable after schema evolution - let batch_stream = table - .scan() - .select_all() - .build() - .unwrap() - .to_arrow() - .await - .unwrap(); - let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); - assert_eq!(batches.len(), 1); - assert_eq!(batches[0], batch); - - // Add a struct column, then add a nested column inside it - let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column(AddColumn::optional( - "info", - iceberg::spec::Type::Struct(iceberg::spec::StructType::new(vec![Arc::new( - iceberg::spec::NestedField::optional( - 0, - "city", - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), - ), - )])), - )); - let tx = add_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Verify the struct column was added - let schema = table.metadata().current_schema(); - let info_field = schema - .field_by_name("info") - .expect("info field should exist"); - assert!(matches!( - info_field.field_type.as_ref(), - iceberg::spec::Type::Struct(_) - )); - let city_field = schema - .field_by_name("info.city") - .expect("info.city field should exist"); - assert!(matches!( - city_field.field_type.as_ref(), - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String) - )); - - // Add a nested column to the struct - let tx = Transaction::new(&table); - let add_action = tx.update_schema().add_column( - AddColumn::optional( - "zip", - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String), - ) - .with_parent("info"), - ); - let tx = add_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Verify the nested column was added - let schema = table.metadata().current_schema(); - let zip_field = schema - .field_by_name("info.zip") - .expect("info.zip field should exist"); - assert!(matches!( - zip_field.field_type.as_ref(), - iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType::String) - )); - - // Verify existing data is still readable - let batch_stream = table - .scan() - .select_all() - .build() - .unwrap() - .to_arrow() - .await - .unwrap(); - let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); - assert_eq!(batches.len(), 1); - - // Create a new writer with the evolved schema and write data including the new field - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - ); - let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( - parquet_writer_builder, - table.file_io().clone(), - location_generator.clone(), - file_name_generator.clone(), - ); - let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); - let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); - let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); - let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); - let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); - let col4 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); - let evolved_arrow_schema: Arc = Arc::new( - table - .metadata() - .current_schema() - .as_ref() - .try_into() - .unwrap(), - ); - // Build a struct array for the "info" column: {city, zip} - let city_array = StringArray::from(vec![Some("NYC"), Some("LA"), None, Some("SF")]); - let zip_array = StringArray::from(vec![Some("10001"), None, Some("90001"), Some("94101")]); - let info_fields = evolved_arrow_schema - .field_with_name("info") - .unwrap() - .data_type() - .clone(); - let struct_fields = match &info_fields { - arrow_schema::DataType::Struct(fields) => fields.clone(), - _ => panic!("expected struct type for info"), - }; - let info_array = StructArray::try_new( - struct_fields, - vec![ - Arc::new(city_array) as ArrayRef, - Arc::new(zip_array) as ArrayRef, - ], - None, - ) - .unwrap(); - let batch_with_new_field = RecordBatch::try_new(evolved_arrow_schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - Arc::new(col4) as ArrayRef, - Arc::new(info_array) as ArrayRef, - ]) - .unwrap(); - data_file_writer - .write(batch_with_new_field.clone()) - .await - .unwrap(); - let data_file = data_file_writer.close().await.unwrap(); - - // Commit the new data with evolved schema - let tx = Transaction::new(&table); - let append_action = tx.fast_append().add_data_files(data_file.clone()); - let tx = append_action.apply(tx).unwrap(); - let _table = tx.commit(&rest_catalog).await.unwrap(); -} - -/// Creates a table, adds data, deletes a non-identifier column, -/// and verifies the schema was updated and existing data is still readable. -#[tokio::test] -async fn test_delete_field() { - let fixture = get_test_fixture(); - let rest_catalog = RestCatalogBuilder::default() - .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 { - configured_scheme: "s3".to_string(), - customized_credential_load: None, - })) - .load("rest", fixture.catalog_config.clone()) - .await - .unwrap(); - let ns = random_ns().await; - let schema = test_schema(); - - let table_creation = TableCreation::builder() - .name("t_delete".to_string()) - .schema(schema.clone()) - .build(); - - let table = rest_catalog - .create_table(ns.name(), table_creation) - .await - .unwrap(); - - // Write initial data with all three columns - let arrow_schema: Arc = Arc::new( - table - .metadata() - .current_schema() - .as_ref() - .try_into() - .unwrap(), - ); - let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); - let file_name_generator = DefaultFileNameGenerator::new( - "test".to_string(), - None, - iceberg::spec::DataFileFormat::Parquet, - ); - let parquet_writer_builder = ParquetWriterBuilder::new( - WriterProperties::default(), - table.metadata().current_schema().clone(), - ); - let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size( - parquet_writer_builder, - table.file_io().clone(), - location_generator.clone(), - file_name_generator.clone(), - ); - let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder); - let mut data_file_writer = data_file_writer_builder.build(None).await.unwrap(); - let col1 = StringArray::from(vec![Some("foo"), Some("bar")]); - let col2 = Int32Array::from(vec![Some(1), Some(2)]); - let col3 = BooleanArray::from(vec![Some(true), Some(false)]); - let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - ]) - .unwrap(); - data_file_writer.write(batch.clone()).await.unwrap(); - let data_file = data_file_writer.close().await.unwrap(); - - // Commit the initial data - let tx = Transaction::new(&table); - let append_action = tx.fast_append().add_data_files(data_file.clone()); - let tx = append_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Delete the optional "baz" column (field 3, not an identifier) - let tx = Transaction::new(&table); - let delete_action = tx.update_schema().delete_column("baz"); - let tx = delete_action.apply(tx).unwrap(); - let table = tx.commit(&rest_catalog).await.unwrap(); - - // Verify the schema no longer contains "baz" - let schema = table.metadata().current_schema(); - assert!( - schema.field_by_name("baz").is_none(), - "baz should have been deleted" - ); - assert!( - schema.field_by_name("foo").is_some(), - "foo should still exist" - ); - assert!( - schema.field_by_name("bar").is_some(), - "bar should still exist" - ); - - // Verify existing data is still readable after the column deletion - let batch_stream = table - .scan() - .select_all() - .build() - .unwrap() - .to_arrow() - .await - .unwrap(); - let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); - assert_eq!(batches.len(), 1); - - // Deleting an identifier field should fail - let tx = Transaction::new(&table); - let delete_action = tx.update_schema().delete_column("bar"); - let tx = delete_action.apply(tx).unwrap(); - let result = tx.commit(&rest_catalog).await; - assert!(result.is_err(), "deleting an identifier field should fail"); - - // Deleting a non-existent column should fail - let tx = Transaction::new(&table); - let delete_action = tx.update_schema().delete_column("nonexistent"); - let tx = delete_action.apply(tx).unwrap(); - let result = tx.commit(&rest_catalog).await; - assert!( - result.is_err(), - "deleting a non-existent column should fail" - ); -} From 7bf9ddab0bbd0f2434793721094dba7731fbb317 Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Fri, 20 Mar 2026 15:18:16 +0200 Subject: [PATCH 12/15] run formatter --- .../loader/tests/loader_catalog_test.rs | 12 +- .../iceberg/src/transaction/update_schema.rs | 146 ++++++++---------- 2 files changed, 73 insertions(+), 85 deletions(-) diff --git a/crates/catalog/loader/tests/loader_catalog_test.rs b/crates/catalog/loader/tests/loader_catalog_test.rs index 92a85e91ca..2dcaa4bd9e 100644 --- a/crates/catalog/loader/tests/loader_catalog_test.rs +++ b/crates/catalog/loader/tests/loader_catalog_test.rs @@ -85,7 +85,10 @@ async fn test_add_field_with_memory_catalog() { let tx = Transaction::new(&table); let tx = tx .update_schema() - .add_column(AddColumn::optional("a", Type::Primitive(PrimitiveType::Int))) + .add_column(AddColumn::optional( + "a", + Type::Primitive(PrimitiveType::Int), + )) .apply(tx) .unwrap(); @@ -109,12 +112,7 @@ async fn test_add_nested_and_delete_field_with_memory_catalog() { .add_column(AddColumn::optional( "info", Type::Struct(iceberg::spec::StructType::new(vec![ - NestedField::optional( - 0, - "city", - Type::Primitive(PrimitiveType::String), - ) - .into(), + NestedField::optional(0, "city", Type::Primitive(PrimitiveType::String)).into(), ])), )) .apply(tx) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 170a2531d7..7f81b43de8 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -490,7 +490,9 @@ mod tests { use as_any::Downcast; - use crate::spec::{Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, Type}; + use crate::spec::{ + Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, Type, + }; use crate::table::Table; use crate::transaction::Transaction; use crate::transaction::action::{ApplyTransactionAction, TransactionAction}; @@ -626,10 +628,14 @@ mod tests { other => panic!("expected AddSchema, got {other:?}"), }; - let mut expected_fields = table.metadata().current_schema().as_struct().fields().to_vec(); - expected_fields.push( - NestedField::optional(4, "new_col", Type::Primitive(PrimitiveType::Int)).into(), - ); + let mut expected_fields = table + .metadata() + .current_schema() + .as_struct() + .fields() + .to_vec(); + expected_fields + .push(NestedField::optional(4, "new_col", Type::Primitive(PrimitiveType::Int)).into()); let expected_schema = Schema::builder() .with_fields(expected_fields) .with_identifier_field_ids(table.metadata().current_schema().identifier_field_ids()) @@ -651,15 +657,13 @@ mod tests { let table = make_v2_table(); let tx = Transaction::new(&table); - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("documented_col") - .field_type(Type::Primitive(PrimitiveType::String)) - .doc("A documented column") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("documented_col") + .field_type(Type::Primitive(PrimitiveType::String)) + .doc("A documented column") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -863,15 +867,13 @@ mod tests { let tx = Transaction::new(&table); // Add "email" to the "person" struct. - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("email") - .field_type(Type::Primitive(PrimitiveType::String)) - .parent("person") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("email") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -899,16 +901,14 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("phone") - .field_type(Type::Primitive(PrimitiveType::String)) - .parent("person") - .doc("Phone number") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("phone") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .doc("Phone number") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -932,15 +932,13 @@ mod tests { // "tags" is a list. Adding to the list navigates to its // element struct automatically. - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("score") - .field_type(Type::Primitive(PrimitiveType::Double)) - .parent("tags") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("score") + .field_type(Type::Primitive(PrimitiveType::Double)) + .parent("tags") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -969,15 +967,13 @@ mod tests { // "props" is a map. Adding to the map navigates to its // value struct automatically. - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("version") - .field_type(Type::Primitive(PrimitiveType::Int)) - .parent("props") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("version") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("props") + .build(), + ); let mut action_commit = Arc::new(action).commit(&table).await.unwrap(); let updates = action_commit.take_updates(); @@ -1001,15 +997,13 @@ mod tests { let table = make_v2_table_with_nested(); let tx = Transaction::new(&table); - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("col") - .field_type(Type::Primitive(PrimitiveType::Int)) - .parent("nonexistent") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("col") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("nonexistent") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1029,15 +1023,13 @@ mod tests { let tx = Transaction::new(&table); // "x" is a primitive (long), not a struct. - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("col") - .field_type(Type::Primitive(PrimitiveType::Int)) - .parent("x") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("col") + .field_type(Type::Primitive(PrimitiveType::Int)) + .parent("x") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, @@ -1057,15 +1049,13 @@ mod tests { let tx = Transaction::new(&table); // "name" already exists in the "person" struct. - let action = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("name") - .field_type(Type::Primitive(PrimitiveType::String)) - .parent("person") - .build(), - ); + let action = tx.update_schema().add_column( + AddColumn::builder() + .name("name") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("person") + .build(), + ); let err = match Arc::new(action).commit(&table).await { Err(e) => e, From 4d9fb02ec2ec24b8627eafd43633608ed6ec178a Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 4 May 2026 14:36:53 +0300 Subject: [PATCH 13/15] implement suggestions --- Cargo.lock | 808 ++++++++++-------- .../loader/tests/loader_catalog_test.rs | 163 ---- .../loader/tests/schema_update_suite.rs | 289 +++++++ .../iceberg/src/transaction/update_schema.rs | 61 +- 4 files changed, 761 insertions(+), 560 deletions(-) delete mode 100644 crates/catalog/loader/tests/loader_catalog_test.rs create mode 100644 crates/catalog/loader/tests/schema_update_suite.rs diff --git a/Cargo.lock b/Cargo.lock index d659a6a7f8..2ec5afd178 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,7 +14,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" dependencies = [ - "crypto-common", + "crypto-common 0.1.7", "generic-array", ] @@ -97,21 +97,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anstream" -version = "0.6.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" -dependencies = [ - "anstyle", - "anstyle-parse 0.2.7", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - [[package]] name = "anstream" version = "1.0.0" @@ -119,7 +104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", - "anstyle-parse 1.0.0", + "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", @@ -133,15 +118,6 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - [[package]] name = "anstyle-parse" version = "1.0.0" @@ -157,7 +133,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -168,7 +144,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -187,7 +163,7 @@ dependencies = [ "bon", "bzip2", "crc32fast", - "digest", + "digest 0.10.7", "liblzma", "log", "miniz_oxide", @@ -235,9 +211,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" +checksum = "607e64bb911ee4f90483e044fe78f175989148c2892e659a2cd25429e782ec54" dependencies = [ "arrow-arith", "arrow-array", @@ -256,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" +checksum = "e754319ed8a85d817fe7adf183227e0b5308b82790a737b426c1124626b48118" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +246,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" +checksum = "841321891f247aa86c6112c80d83d89cb36e0addd020fa2425085b8eb6c3f579" dependencies = [ "ahash", "arrow-buffer", @@ -281,7 +257,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "num-complex", "num-integer", "num-traits", @@ -289,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" +checksum = "f955dfb73fae000425f49c8226d2044dab60fb7ad4af1e24f961756354d996c9" dependencies = [ "bytes", "half", @@ -301,9 +277,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" +checksum = "ca5e686972523798f76bef355145bc1ae25a84c731e650268d31ab763c701663" dependencies = [ "arrow-array", "arrow-buffer", @@ -323,9 +299,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" +checksum = "86c276756867fc8186ec380c72c290e6e3b23a1d4fb05df6b1d62d2e62666d48" dependencies = [ "arrow-array", "arrow-cast", @@ -338,9 +314,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" +checksum = "db3b5846209775b6dc8056d77ff9a032b27043383dd5488abd0b663e265b9373" dependencies = [ "arrow-buffer", "arrow-schema", @@ -351,9 +327,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" +checksum = "fd8907ddd8f9fbabf91ec2c85c1d81fe2874e336d2443eb36373595e28b98dd5" dependencies = [ "arrow-array", "arrow-buffer", @@ -367,18 +343,19 @@ dependencies = [ [[package]] name = "arrow-json" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" +checksum = "f4518c59acc501f10d7dcae397fe12b8db3d81bc7de94456f8a58f9165d6f502" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", - "indexmap 2.13.0", + "indexmap 2.14.0", "itoa", "lexical-core", "memchr", @@ -391,9 +368,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" +checksum = "efa70d9d6b1356f1fb9f1f651b84a725b7e0abb93f188cf7d31f14abfa2f2e6f" dependencies = [ "arrow-array", "arrow-buffer", @@ -404,9 +381,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" +checksum = "faec88a945338192beffbbd4be0def70135422930caa244ac3cec0cd213b26b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -417,9 +394,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" +checksum = "18aa020f6bc8e5201dcd2d4b7f98c68f8a410ef37128263243e6ff2a47a67d4f" dependencies = [ "serde_core", "serde_json", @@ -427,9 +404,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" +checksum = "a657ab5132e9c8ca3b24eb15a823d0ced38017fe3930ff50167466b02e2d592c" dependencies = [ "ahash", "arrow-array", @@ -441,9 +418,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" +checksum = "f6de2efbbd1a9f9780ceb8d1ff5d20421b35863b361e3386b4f571f1fc69fcb8" dependencies = [ "arrow-array", "arrow-buffer", @@ -486,9 +463,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -552,9 +529,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.15" +version = "1.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" +checksum = "50f156acdd2cf55f5aa53ee416c4ac851cf1222694506c0b1f78c85695e9ca9d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -594,9 +571,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.2" +version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" dependencies = [ "aws-lc-sys", "zeroize", @@ -604,9 +581,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.39.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" dependencies = [ "cc", "cmake", @@ -616,9 +593,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" +checksum = "5dcd93c82209ac7413532388067dce79be5a8780c1786e5fae3df22e4dee2864" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -641,9 +618,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.142.0" +version = "1.144.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3962675ec1f2012ae6439814e784557550fa239a4a291bd4f33d8f514d4fdb5b" +checksum = "24165072a1fd89118365d686e569b96b8376a1abfb2f312bd6755fbe3e74b9dc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -665,9 +642,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.54.0" +version = "1.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e0ec266873694efc365debded01f44e27a0de3946a3ac15d24c489759e5ddf8" +checksum = "051311af78d5fe5f4453303b7ef978a07cad2a00590d8d9e12adf8591a847a3b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -689,9 +666,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" +checksum = "d69c77aafa20460c68b6b3213c84f6423b6e76dbf89accd3e1789a686ffd9489" dependencies = [ "aws-credential-types", "aws-runtime", @@ -713,9 +690,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.99.0" +version = "1.100.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" +checksum = "1c7e7b09346d5ca22a2a08267555843a6a0127fb20d8964cb6ecfb8fdb190225" dependencies = [ "aws-credential-types", "aws-runtime", @@ -737,9 +714,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.101.0" +version = "1.103.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" +checksum = "c2249b81a2e73a8027c41c378463a81ec39b8510f184f2caab87de912af0f49b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -762,9 +739,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +checksum = "68dc0b907359b120170613b5c09ccc61304eac3998ff6274b97d93ee6490115a" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -773,11 +750,11 @@ dependencies = [ "bytes", "form_urlencoded", "hex", - "hmac", + "hmac 0.13.0", "http 0.2.12", "http 1.4.0", "percent-encoding", - "sha2", + "sha2 0.11.0", "time", "tracing", ] @@ -868,9 +845,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.10.3" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110" +checksum = "0504b1ab12debb5959e5165ee5fe97dd387e7aa7ea6a477bfd7635dfe769a4f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -893,11 +870,12 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.11.6" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6" +checksum = "b71a13df6ada0aafbf21a73bdfcdf9324cfa9df77d96b8446045be3cde61b42e" dependencies = [ "aws-smithy-async", + "aws-smithy-runtime-api-macros", "aws-smithy-types", "bytes", "http 0.2.12", @@ -908,6 +886,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "aws-smithy-types" version = "1.4.7" @@ -945,9 +934,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.14" +version = "1.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" +checksum = "2f4bbcaa9304ea40902d3d5f42a0428d1bd895a2b0f6999436fb279ffddc58ac" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1012,9 +1001,9 @@ checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" [[package]] name = "bitflags" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" dependencies = [ "serde_core", ] @@ -1025,21 +1014,21 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "cpufeatures 0.2.17", + "cpufeatures 0.3.0", ] [[package]] @@ -1051,6 +1040,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "block-padding" version = "0.3.3" @@ -1086,7 +1084,7 @@ version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.20.11", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", @@ -1187,9 +1185,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.57" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ "find-msvc-tools", "jobserver", @@ -1217,7 +1215,7 @@ checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" dependencies = [ "cfg-if", "cpufeatures 0.3.0", - "rand_core 0.10.0", + "rand_core 0.10.1", ] [[package]] @@ -1250,15 +1248,15 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "crypto-common", + "crypto-common 0.1.7", "inout", ] [[package]] name = "clap" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -1270,7 +1268,7 @@ version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "anstream 1.0.0", + "anstream", "anstyle", "clap_lex", "strsim", @@ -1278,9 +1276,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", @@ -1305,13 +1303,19 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" + [[package]] name = "colorchoice" version = "1.0.5" @@ -1324,7 +1328,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -1339,9 +1343,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ "bzip2", "compression-core", @@ -1354,9 +1358,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "concurrent-queue" @@ -1385,6 +1389,12 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-random" version = "0.1.18" @@ -1456,9 +1466,9 @@ dependencies = [ [[package]] name = "crc-catalog" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" [[package]] name = "crc32c" @@ -1528,6 +1538,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "csv" version = "1.4.0" @@ -1558,6 +1577,15 @@ dependencies = [ "cipher", ] +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "darling" version = "0.20.11" @@ -1787,7 +1815,7 @@ dependencies = [ "half", "hashbrown 0.16.1", "hex", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "libc", "log", @@ -2012,7 +2040,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "paste", "recursive", @@ -2028,7 +2056,7 @@ checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "paste", ] @@ -2060,7 +2088,7 @@ dependencies = [ "num-traits", "rand 0.9.4", "regex", - "sha2", + "sha2 0.10.9", "unicode-segmentation", "uuid", ] @@ -2192,7 +2220,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "log", "recursive", @@ -2215,7 +2243,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", "paste", @@ -2251,7 +2279,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "parking_lot", ] @@ -2298,7 +2326,7 @@ dependencies = [ "futures", "half", "hashbrown 0.16.1", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools 0.14.0", "log", "num-traits", @@ -2361,7 +2389,7 @@ dependencies = [ "rand 0.9.4", "serde_json", "sha1", - "sha2", + "sha2 0.10.9", "url", ] @@ -2377,7 +2405,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-functions-nested", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "recursive", "regex", @@ -2436,7 +2464,7 @@ version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ - "const-oid", + "const-oid 0.9.6", "pem-rfc7468", "zeroize", ] @@ -2494,12 +2522,24 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", "subtle", ] +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid 0.10.2", + "crypto-common 0.2.1", + "ctutils", +] + [[package]] name = "dirs" version = "6.0.0" @@ -2518,7 +2558,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2626,9 +2666,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log", "regex", @@ -2636,11 +2676,11 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ - "anstream 0.6.21", + "anstream", "anstyle", "env_filter", "jiff", @@ -2671,7 +2711,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2742,9 +2782,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "faststr" @@ -2842,9 +2882,12 @@ dependencies = [ [[package]] name = "fragile" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619" +checksum = "8878864ba14bb86e818a412bfd6f18f9eabd4ec0f008a28e8f7eb61db532fcf9" +dependencies = [ + "futures-core", +] [[package]] name = "fs-err" @@ -3012,7 +3055,7 @@ dependencies = [ "cfg-if", "libc", "r-efi 6.0.0", - "rand_core 0.10.0", + "rand_core 0.10.1", "wasip2", "wasip3", ] @@ -3057,7 +3100,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.4.0", - "indexmap 2.13.0", + "indexmap 2.14.0", "slab", "tokio", "tokio-util", @@ -3111,6 +3154,12 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + [[package]] name = "hashlink" version = "0.10.0" @@ -3150,7 +3199,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" dependencies = [ - "hmac", + "hmac 0.12.1", ] [[package]] @@ -3159,7 +3208,16 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", +] + +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", ] [[package]] @@ -3244,11 +3302,20 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hybrid-array" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -3261,7 +3328,6 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -3269,20 +3335,19 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.7" +version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ "http 1.4.0", "hyper", "hyper-util", "rustls", "rustls-native-certs", - "rustls-pki-types", "tokio", "tokio-rustls", "tower-service", - "webpki-roots 1.0.6", + "webpki-roots 1.0.7", ] [[package]] @@ -3302,7 +3367,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3632,12 +3697,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -3645,9 +3711,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -3658,9 +3724,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -3672,15 +3738,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -3692,15 +3758,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -3736,9 +3802,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -3757,12 +3823,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "serde", "serde_core", ] @@ -3808,9 +3874,9 @@ dependencies = [ [[package]] name = "inventory" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" dependencies = [ "rustversion", ] @@ -3823,9 +3889,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -3863,9 +3929,9 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" +checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3873,14 +3939,14 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" +checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7" dependencies = [ "proc-macro2", "quote", @@ -3914,10 +3980,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -4011,15 +4079,15 @@ dependencies = [ [[package]] name = "libbz2-rs-sys" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" +checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] name = "libc" -version = "0.2.183" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "liblzma" @@ -4032,9 +4100,9 @@ dependencies = [ [[package]] name = "liblzma-sys" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186" +checksum = "1a60851d15cd8c5346eca4ab8babff585be2ae4bc8097c067291d3ffe2add3b6" dependencies = [ "cc", "libc", @@ -4049,24 +4117,23 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" -version = "0.1.44" +version = "0.1.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" +checksum = "2d1eacfa31c33ec25e873c136ba5669f00f9866d0688bea7be4d3f7e43067df6" dependencies = [ "cc", - "libc", ] [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ "bitflags", "libc", "plain", - "redox_syscall 0.7.3", + "redox_syscall 0.7.4", ] [[package]] @@ -4086,7 +4153,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" dependencies = [ - "anstream 1.0.0", + "anstream", "anstyle", "clap", "escape8259", @@ -4117,9 +4184,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -4173,7 +4240,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ "cfg-if", - "digest", + "digest 0.10.7", ] [[package]] @@ -4212,9 +4279,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.48" +version = "0.1.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" +checksum = "b3627c4272df786b9260cabaa46aec1d59c93ede723d4c3ef646c503816b0640" dependencies = [ "libmimalloc-sys", ] @@ -4422,7 +4489,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4447,7 +4514,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand 0.8.5", + "rand 0.8.6", "smallvec", "zeroize", ] @@ -4463,9 +4530,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -4642,7 +4709,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" dependencies = [ "num-traits", - "rand 0.8.5", + "rand 0.8.6", "serde", ] @@ -4699,9 +4766,9 @@ dependencies = [ [[package]] name = "parquet" -version = "58.1.0" +version = "58.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" +checksum = "43d7efd3052f7d6ef601085559a246bc991e9a8cc77e02753737df6322ce35f1" dependencies = [ "ahash", "arrow-array", @@ -4717,7 +4784,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "lz4_flex", "num-bigint", "num-integer", @@ -4783,8 +4850,8 @@ version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ - "digest", - "hmac", + "digest 0.10.7", + "hmac 0.12.1", ] [[package]] @@ -4820,7 +4887,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", ] @@ -4918,7 +4985,7 @@ dependencies = [ "der", "pbkdf2", "scrypt", - "sha2", + "sha2 0.10.9", "spki", ] @@ -4936,9 +5003,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "plain" @@ -4966,18 +5033,18 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" dependencies = [ "portable-atomic", ] [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -5049,7 +5116,7 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.25.5+spec-1.1.0", + "toml_edit 0.25.11+spec-1.1.0", ] [[package]] @@ -5078,7 +5145,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -5097,7 +5164,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -5114,9 +5181,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +checksum = "645dbe486e346d9b5de3ef16ede18c26e6c70ad97418f4874b8b1889d6e761ea" dependencies = [ "ar_archive_writer", "cc", @@ -5191,7 +5258,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -5228,7 +5295,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -5275,9 +5342,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" dependencies = [ "libc", "rand_chacha 0.3.1", @@ -5303,7 +5370,7 @@ checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "chacha20", "getrandom 0.4.2", - "rand_core 0.10.0", + "rand_core 0.10.1", ] [[package]] @@ -5347,9 +5414,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" [[package]] name = "recursive" @@ -5382,9 +5449,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a" dependencies = [ "bitflags", ] @@ -5490,7 +5557,7 @@ dependencies = [ "form_urlencoded", "getrandom 0.2.17", "hex", - "hmac", + "hmac 0.12.1", "home", "http 1.4.0", "jsonwebtoken", @@ -5498,14 +5565,14 @@ dependencies = [ "once_cell", "percent-encoding", "quick-xml 0.37.5", - "rand 0.8.5", + "rand 0.8.6", "reqwest", "rsa", "rust-ini", "serde", "serde_json", "sha1", - "sha2", + "sha2 0.10.9", "tokio", ] @@ -5549,7 +5616,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.6", + "webpki-roots 1.0.7", ] [[package]] @@ -5568,13 +5635,13 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a30e631b7f4a03dee9056b8ef6982e8ba371dd5bedb74d3ec86df4499132c70" +checksum = "73389e0c99e664f919275ab5b5b0471391fe9a8de61e1dff9b1eaf56a90f16e3" dependencies = [ "bytes", - "hashbrown 0.16.1", - "indexmap 2.13.0", + "hashbrown 0.17.0", + "indexmap 2.14.0", "munge", "ptr_meta", "rancor", @@ -5586,9 +5653,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8100bb34c0a1d0f907143db3149e6b4eea3c33b9ee8b189720168e818303986f" +checksum = "5d2ed0b54125315fb36bd021e82d314d1c126548f871634b483f46b31d13cac6" dependencies = [ "proc-macro2", "quote", @@ -5597,9 +5664,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9" dependencies = [ "bytemuck", "byteorder", @@ -5611,15 +5678,15 @@ version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ - "const-oid", - "digest", + "const-oid 0.9.6", + "digest 0.10.7", "num-bigint-dig", "num-integer", "num-traits", "pkcs1", "pkcs8", "rand_core 0.6.4", - "sha2", + "sha2 0.10.9", "signature", "spki", "subtle", @@ -5667,11 +5734,11 @@ dependencies = [ [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" dependencies = [ - "rand 0.8.5", + "rand 0.8.6", ] [[package]] @@ -5693,14 +5760,14 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.37" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "aws-lc-rs", "once_cell", @@ -5725,9 +5792,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -5868,7 +5935,7 @@ checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" dependencies = [ "pbkdf2", "salsa20", - "sha2", + "sha2 0.10.9", ] [[package]] @@ -5896,9 +5963,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" dependencies = [ "serde", "serde_core", @@ -5931,9 +5998,9 @@ dependencies = [ [[package]] name = "serde_arrow" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2784e59a0315568e850cb01ddadf458f8c09e28d8cfc4880c2cc08f5dc3444e0" +checksum = "26e4ac1bef72720318e2c67bd19b972d17084840f3188a585021828122c43c2c" dependencies = [ "arrow-array", "arrow-schema", @@ -6044,15 +6111,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.18.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" +checksum = "f05839ce67618e14a09b286535c0d9c94e85ef25469b0e13cb4f844e5593eb19" dependencies = [ "base64", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.14.0", "schemars 0.9.0", "schemars 1.2.1", "serde_core", @@ -6063,9 +6130,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.18.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" +checksum = "cf2ebbe86054f9b45bc3881e865683ccfaccce97b9b4cb53f3039d67f355a334" dependencies = [ "darling 0.23.0", "proc-macro2", @@ -6079,7 +6146,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "itoa", "ryu", "serde", @@ -6094,7 +6161,7 @@ checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", ] [[package]] @@ -6105,7 +6172,18 @@ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -6139,15 +6217,15 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ - "digest", + "digest 0.10.7", "rand_core 0.6.4", ] [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "simdutf8" @@ -6175,9 +6253,9 @@ dependencies = [ [[package]] name = "siphasher" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" [[package]] name = "slab" @@ -6217,14 +6295,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "sonic-number" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5661364b38abad49cf1ade6631fcc35d2ccf882a7d68616b4228b7717feb5fba" +checksum = "3775c3390edf958191f1ab1e8c5c188907feebd0f3ce1604cb621f72961dbf32" dependencies = [ "cfg-if", ] @@ -6251,9 +6329,9 @@ dependencies = [ [[package]] name = "sonic-simd" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f944718c33623919878cf74b4c9361eb3024f635733922b26722b14cd3f8cc" +checksum = "f99e664ecd2d85a68c87e3c7a3cfe691f647ea9e835de984aba4d54a41f817d4" dependencies = [ "cfg-if", ] @@ -6293,7 +6371,7 @@ dependencies = [ "libtest-mimic", "md-5", "owo-colors", - "rand 0.8.5", + "rand 0.8.6", "regex", "similar", "subst", @@ -6355,7 +6433,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "memchr", "once_cell", @@ -6363,7 +6441,7 @@ dependencies = [ "rustls", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "smallvec", "thiserror 2.0.18", "tokio", @@ -6401,7 +6479,7 @@ dependencies = [ "quote", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "sqlx-core", "sqlx-sqlite", "syn", @@ -6421,7 +6499,7 @@ dependencies = [ "byteorder", "bytes", "crc", - "digest", + "digest 0.10.7", "dotenvy", "either", "futures-channel", @@ -6431,17 +6509,17 @@ dependencies = [ "generic-array", "hex", "hkdf", - "hmac", + "hmac 0.12.1", "itoa", "log", "md-5", "memchr", "once_cell", "percent-encoding", - "rand 0.8.5", + "rand 0.8.6", "rsa", "sha1", - "sha2", + "sha2 0.10.9", "smallvec", "sqlx-core", "stringprep", @@ -6468,17 +6546,17 @@ dependencies = [ "futures-util", "hex", "hkdf", - "hmac", + "hmac 0.12.1", "home", "itoa", "log", "md-5", "memchr", "once_cell", - "rand 0.8.5", + "rand 0.8.6", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "smallvec", "sqlx-core", "stringprep", @@ -6519,15 +6597,15 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" +checksum = "640c8cdd92b6b12f5bcb1803ca3bbf5ab96e5e6b6b96b9ab77dabe9e880b3190" dependencies = [ "cc", "cfg-if", "libc", "psm", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6656,7 +6734,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6767,9 +6845,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -6876,9 +6954,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "1.0.1+spec-1.1.0" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" dependencies = [ "serde_core", ] @@ -6889,7 +6967,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", "serde_spanned", "toml_datetime 0.6.11", @@ -6899,23 +6977,23 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.25.5+spec-1.1.0" +version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ - "indexmap 2.13.0", - "toml_datetime 1.0.1+spec-1.1.0", + "indexmap 2.14.0", + "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", - "winnow 1.0.0", + "winnow 1.0.2", ] [[package]] name = "toml_parser" -version = "1.0.10+spec-1.1.0" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow 1.0.0", + "winnow 1.0.2", ] [[package]] @@ -7067,9 +7145,9 @@ checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "typetag" @@ -7171,9 +7249,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -7205,7 +7283,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" dependencies = [ - "crypto-common", + "crypto-common 0.1.7", "subtle", ] @@ -7253,9 +7331,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.0" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -7370,11 +7448,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.2+wasi-0.2.9" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.57.1", ] [[package]] @@ -7383,7 +7461,7 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] @@ -7394,9 +7472,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" dependencies = [ "cfg-if", "once_cell", @@ -7407,23 +7485,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.64" +version = "0.4.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" dependencies = [ - "cfg-if", - "futures-util", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7431,9 +7505,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" dependencies = [ "bumpalo", "proc-macro2", @@ -7444,9 +7518,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" dependencies = [ "unicode-ident", ] @@ -7468,7 +7542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.13.0", + "indexmap 2.14.0", "wasm-encoder", "wasmparser", ] @@ -7494,15 +7568,15 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap 2.14.0", "semver", ] [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" dependencies = [ "js-sys", "wasm-bindgen", @@ -7524,14 +7598,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.6", + "webpki-roots 1.0.7", ] [[package]] name = "webpki-roots" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" dependencies = [ "rustls-pki-types", ] @@ -7552,7 +7626,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -7856,9 +7930,9 @@ dependencies = [ [[package]] name = "winnow" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" dependencies = [ "memchr", ] @@ -7872,6 +7946,12 @@ dependencies = [ "wit-bindgen-rust-macro", ] +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + [[package]] name = "wit-bindgen-core" version = "0.51.0" @@ -7891,7 +7971,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.13.0", + "indexmap 2.14.0", "prettyplease", "syn", "wasm-metadata", @@ -7922,7 +8002,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "serde", "serde_derive", @@ -7941,7 +8021,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "semver", "serde", @@ -7953,9 +8033,9 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "xmlparser" @@ -7971,9 +8051,9 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -7982,9 +8062,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -7994,18 +8074,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.47" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.47" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", @@ -8014,18 +8094,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -8041,9 +8121,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -8052,9 +8132,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -8063,9 +8143,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", diff --git a/crates/catalog/loader/tests/loader_catalog_test.rs b/crates/catalog/loader/tests/loader_catalog_test.rs deleted file mode 100644 index 2dcaa4bd9e..0000000000 --- a/crates/catalog/loader/tests/loader_catalog_test.rs +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Catalog tests for schema evolution with `MemoryCatalog`. - -use std::collections::HashMap; - -use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; -use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; -use iceberg::transaction::{AddColumn, ApplyTransactionAction, Transaction}; -use iceberg::{Catalog, CatalogBuilder, ErrorKind, NamespaceIdent, TableCreation, TableIdent}; -use tempfile::TempDir; - -fn base_schema() -> Schema { - Schema::builder() - .with_fields(vec![ - NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), - NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), - NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), - ]) - .with_identifier_field_ids(vec![2]) - .build() - .unwrap() -} - -async fn new_catalog() -> (iceberg::MemoryCatalog, TempDir) { - let warehouse = TempDir::new().unwrap(); - let catalog = MemoryCatalogBuilder::default() - .load( - "memory", - HashMap::from([( - MEMORY_CATALOG_WAREHOUSE.to_string(), - warehouse.path().to_string_lossy().to_string(), - )]), - ) - .await - .unwrap(); - - (catalog, warehouse) -} - -async fn create_table(catalog: &iceberg::MemoryCatalog, table_name: &str) -> TableIdent { - let ns = NamespaceIdent::new("schema_evolution".to_string()); - if catalog.get_namespace(&ns).await.is_err() { - catalog.create_namespace(&ns, HashMap::new()).await.unwrap(); - } - - let table_ident = TableIdent::new(ns.clone(), table_name.to_string()); - let _ = catalog.drop_table(&table_ident).await; - - catalog - .create_table( - &ns, - TableCreation::builder() - .name(table_name.to_string()) - .schema(base_schema()) - .build(), - ) - .await - .unwrap(); - - table_ident -} - -#[tokio::test] -async fn test_add_field_with_memory_catalog() { - let (catalog, _warehouse) = new_catalog().await; - let table_ident = create_table(&catalog, "t_add_field").await; - let table = catalog.load_table(&table_ident).await.unwrap(); - - let tx = Transaction::new(&table); - let tx = tx - .update_schema() - .add_column(AddColumn::optional( - "a", - Type::Primitive(PrimitiveType::Int), - )) - .apply(tx) - .unwrap(); - - let updated_table = tx.commit(&catalog).await.unwrap(); - let schema = updated_table.metadata().current_schema(); - - let field_a = schema.field_by_name("a").expect("a should exist"); - assert_eq!(field_a.id, 4); - assert_eq!(*field_a.field_type, Type::Primitive(PrimitiveType::Int)); -} - -#[tokio::test] -async fn test_add_nested_and_delete_field_with_memory_catalog() { - let (catalog, _warehouse) = new_catalog().await; - let table_ident = create_table(&catalog, "t_add_nested_delete").await; - let table = catalog.load_table(&table_ident).await.unwrap(); - - let tx = Transaction::new(&table); - let tx = tx - .update_schema() - .add_column(AddColumn::optional( - "info", - Type::Struct(iceberg::spec::StructType::new(vec![ - NestedField::optional(0, "city", Type::Primitive(PrimitiveType::String)).into(), - ])), - )) - .apply(tx) - .unwrap(); - let table = tx.commit(&catalog).await.unwrap(); - - let tx = Transaction::new(&table); - let tx = tx - .update_schema() - .add_column( - AddColumn::builder() - .name("zip") - .field_type(Type::Primitive(PrimitiveType::String)) - .parent("info") - .build(), - ) - .delete_column("baz") - .apply(tx) - .unwrap(); - let table = tx.commit(&catalog).await.unwrap(); - - let schema = table.metadata().current_schema(); - assert!(schema.field_by_name("info").is_some()); - assert!(schema.field_by_name("info.city").is_some()); - assert!(schema.field_by_name("info.zip").is_some()); - assert!(schema.field_by_name("baz").is_none()); -} - -#[tokio::test] -async fn test_delete_identifier_and_missing_field_fail_with_memory_catalog() { - let (catalog, _warehouse) = new_catalog().await; - let table_ident = create_table(&catalog, "t_delete_failures").await; - let table = catalog.load_table(&table_ident).await.unwrap(); - - let tx = Transaction::new(&table); - let tx = tx.update_schema().delete_column("bar").apply(tx).unwrap(); - let err = tx.commit(&catalog).await.unwrap_err(); - assert_eq!(err.kind(), ErrorKind::PreconditionFailed); - - let tx = Transaction::new(&table); - let tx = tx - .update_schema() - .delete_column("nonexistent") - .apply(tx) - .unwrap(); - let err = tx.commit(&catalog).await.unwrap_err(); - assert_eq!(err.kind(), ErrorKind::PreconditionFailed); -} diff --git a/crates/catalog/loader/tests/schema_update_suite.rs b/crates/catalog/loader/tests/schema_update_suite.rs new file mode 100644 index 0000000000..7338967c76 --- /dev/null +++ b/crates/catalog/loader/tests/schema_update_suite.rs @@ -0,0 +1,289 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common schema-update behavior across catalogs. +//! +//! These tests assume Docker containers are started externally via `make docker-up`. + +mod common; + +use std::collections::HashMap; + +use common::{CatalogKind, cleanup_namespace_dyn, load_catalog}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, StructType, Type}; +use iceberg::transaction::{AddColumn, ApplyTransactionAction, Transaction}; +use iceberg::{ErrorKind, NamespaceIdent, Result, TableCreation, TableIdent}; +use iceberg_test_utils::normalize_test_name_with_parts; +use rstest::rstest; + +fn base_schema() -> Schema { + Schema::builder() + .with_fields(vec![ + NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), + ]) + .with_identifier_field_ids(vec![2]) + .build() + .unwrap() +} + +// Common behavior: adding a top-level column appends it to the schema. +// HMS is excluded because update_table is not yet supported. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_schema_add_column(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_schema_add_column", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = + normalize_test_name_with_parts!("catalog_schema_add_column", harness.label, "table"); + let table = catalog + .create_table( + &namespace, + TableCreation::builder() + .name(table_name) + .schema(base_schema()) + .build(), + ) + .await?; + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column(AddColumn::optional( + "a", + Type::Primitive(PrimitiveType::Int), + )) + .apply(tx)?; + let updated = tx.commit(catalog.as_ref()).await?; + + let schema = updated.metadata().current_schema(); + let field_a = schema.field_by_name("a").expect("field 'a' should exist"); + assert_eq!(field_a.id, 4); + assert_eq!(*field_a.field_type, Type::Primitive(PrimitiveType::Int)); + + Ok(()) +} + +// Common behavior: adding a nested struct column then a sub-field within it, +// and deleting another top-level column, all persist correctly. +// HMS is excluded because update_table is not yet supported. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_schema_add_nested_and_delete_column(#[case] kind: CatalogKind) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_schema_add_nested_and_delete_column", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = normalize_test_name_with_parts!( + "catalog_schema_add_nested_and_delete_column", + harness.label, + "table" + ); + let table = catalog + .create_table( + &namespace, + TableCreation::builder() + .name(table_name) + .schema(base_schema()) + .build(), + ) + .await?; + + // First transaction: add a nested struct column. + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column(AddColumn::optional( + "info", + Type::Struct(StructType::new(vec![ + NestedField::optional(0, "city", Type::Primitive(PrimitiveType::String)).into(), + ])), + )) + .apply(tx)?; + let table = tx.commit(catalog.as_ref()).await?; + + // Second transaction: add a sub-field to the nested struct and delete a top-level column. + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column( + AddColumn::builder() + .name("zip") + .field_type(Type::Primitive(PrimitiveType::String)) + .parent("info") + .build(), + ) + .delete_column("baz") + .apply(tx)?; + let table = tx.commit(catalog.as_ref()).await?; + + let schema = table.metadata().current_schema(); + assert!(schema.field_by_name("info").is_some()); + assert!(schema.field_by_name("info.city").is_some()); + assert!(schema.field_by_name("info.zip").is_some()); + assert!(schema.field_by_name("baz").is_none()); + + Ok(()) +} + +// Common behavior: deleting an identifier field or a nonexistent field must fail. +// HMS is excluded because update_table is not yet supported. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_schema_delete_invalid_column_errors( + #[case] kind: CatalogKind, +) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_schema_delete_invalid_column_errors", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = normalize_test_name_with_parts!( + "catalog_schema_delete_invalid_column_errors", + harness.label, + "table" + ); + let table = catalog + .create_table( + &namespace, + TableCreation::builder() + .name(table_name) + .schema(base_schema()) + .build(), + ) + .await?; + + // Deleting an identifier field must fail. + let tx = Transaction::new(&table); + let tx = tx.update_schema().delete_column("bar").apply(tx)?; + let err = tx.commit(catalog.as_ref()).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + + // Deleting a nonexistent field must fail. + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .delete_column("nonexistent") + .apply(tx)?; + let err = tx.commit(catalog.as_ref()).await.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::PreconditionFailed); + + Ok(()) +} + +// Common behavior: loading a table after a schema update reflects the new schema. +// HMS is excluded because update_table is not yet supported. +#[rstest] +#[case::rest_catalog(CatalogKind::Rest)] +#[case::glue_catalog(CatalogKind::Glue)] +#[case::sql_catalog(CatalogKind::Sql)] +#[case::s3tables_catalog(CatalogKind::S3Tables)] +#[case::memory_catalog(CatalogKind::Memory)] +#[tokio::test] +async fn test_catalog_schema_update_persisted_after_reload( + #[case] kind: CatalogKind, +) -> Result<()> { + let Some(harness) = load_catalog(kind).await else { + return Ok(()); + }; + let catalog = harness.catalog; + let namespace = NamespaceIdent::new(normalize_test_name_with_parts!( + "catalog_schema_update_persisted_after_reload", + harness.label + )); + + cleanup_namespace_dyn(catalog.as_ref(), &namespace).await; + catalog.create_namespace(&namespace, HashMap::new()).await?; + + let table_name = normalize_test_name_with_parts!( + "catalog_schema_update_persisted_after_reload", + harness.label, + "table" + ); + let table_ident = TableIdent::new(namespace.clone(), table_name.clone()); + let table = catalog + .create_table( + &namespace, + TableCreation::builder() + .name(table_name) + .schema(base_schema()) + .build(), + ) + .await?; + + let tx = Transaction::new(&table); + let tx = tx + .update_schema() + .add_column(AddColumn::optional( + "new_field", + Type::Primitive(PrimitiveType::Long), + )) + .apply(tx)?; + tx.commit(catalog.as_ref()).await?; + + let reloaded = catalog.load_table(&table_ident).await?; + assert!( + reloaded + .metadata() + .current_schema() + .field_by_name("new_field") + .is_some() + ); + + Ok(()) +} diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index 7f81b43de8..a16e10e493 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -32,7 +32,7 @@ use crate::{Error, ErrorKind, Result, TableRequirement, TableUpdate}; /// Sentinel parent ID representing the table root (top-level columns). const TABLE_ROOT_ID: i32 = -1; // Default ID for a new column. This will be re-assigned to a fresh ID at commit time. -const DEFAULT_ID: i32 = 0; +const DEFAULT_FIELD_ID: i32 = 0; /// Declarative specification for adding a column in [`UpdateSchemaAction`]. /// @@ -80,7 +80,7 @@ impl AddColumn { fn to_nested_field(&self) -> NestedFieldRef { let mut field = NestedField::new( - DEFAULT_ID, + DEFAULT_FIELD_ID, self.name.clone(), self.field_type.clone(), self.required, @@ -118,7 +118,6 @@ impl AddColumn { pub struct UpdateSchemaAction { additions: Vec, deletes: Vec, - auto_assign_ids: bool, } impl UpdateSchemaAction { @@ -127,7 +126,6 @@ impl UpdateSchemaAction { Self { additions: Vec::new(), deletes: Vec::new(), - auto_assign_ids: true, } } @@ -152,13 +150,6 @@ impl UpdateSchemaAction { self.deletes.push(name.to_string()); self } - - /// Disable automatic field ID assignment. When disabled, the placeholder IDs - /// provided in builder methods are used as-is. - pub fn disable_id_auto_assignment(mut self) -> Self { - self.auto_assign_ids = false; - self - } } // --------------------------------------------------------------------------- @@ -171,7 +162,7 @@ impl UpdateSchemaAction { /// from `crate::spec::schema::id_reassigner`, but operates on new fields with placeholder /// IDs rather than reassigning an existing schema. `ReassignFieldIds` cannot be used /// directly here because it rejects duplicate old IDs (all new fields share placeholder -/// ID `DEFAULT_ID`). +/// ID `DEFAULT_FIELD_ID`). fn assign_fresh_ids(field: &NestedField, next_id: &mut i32) -> NestedFieldRef { *next_id += 1; let new_id = *next_id; @@ -444,11 +435,7 @@ impl TransactionAction for UpdateSchemaAction { }; // Assign fresh IDs immediately, preserving insertion order. - let field = if self.auto_assign_ids { - assign_fresh_ids(&pending_field, &mut last_column_id) - } else { - pending_field - }; + let field = assign_fresh_ids(&pending_field, &mut last_column_id); additions_by_parent .entry(parent_id) @@ -491,13 +478,14 @@ mod tests { use as_any::Downcast; use crate::spec::{ - Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, Type, + DEFAULT_SCHEMA_ID, Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, + Type, }; use crate::table::Table; use crate::transaction::Transaction; use crate::transaction::action::{ApplyTransactionAction, TransactionAction}; use crate::transaction::tests::make_v2_table; - use crate::transaction::update_schema::{AddColumn, DEFAULT_ID, UpdateSchemaAction}; + use crate::transaction::update_schema::{AddColumn, DEFAULT_FIELD_ID, UpdateSchemaAction}; use crate::{ErrorKind, TableIdent, TableRequirement, TableUpdate}; // The V2 test table has: @@ -628,17 +616,16 @@ mod tests { other => panic!("expected AddSchema, got {other:?}"), }; - let mut expected_fields = table + let expected_schema = table .metadata() .current_schema() - .as_struct() - .fields() - .to_vec(); - expected_fields - .push(NestedField::optional(4, "new_col", Type::Primitive(PrimitiveType::Int)).into()); - let expected_schema = Schema::builder() - .with_fields(expected_fields) - .with_identifier_field_ids(table.metadata().current_schema().identifier_field_ids()) + .as_ref() + .clone() + .into_builder() + .with_schema_id(DEFAULT_SCHEMA_ID) + .with_fields([ + NestedField::optional(4, "new_col", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build() .unwrap(); assert_eq!(new_schema, &expected_schema); @@ -1113,17 +1100,25 @@ mod tests { #[tokio::test] async fn test_add_nested_struct_type_with_fresh_ids() { // Adding a new column whose TYPE contains nested fields (e.g. a struct column). All sub-fields must receive - // fresh IDs, not placeholder `DEFAULT_ID`. + // fresh IDs, not placeholder `DEFAULT_FIELD_ID`. let table = make_v2_table(); let tx = Transaction::new(&table); let action = tx.update_schema().add_column(AddColumn::optional( "address", Type::Struct(StructType::new(vec![ - NestedField::optional(DEFAULT_ID, "street", Type::Primitive(PrimitiveType::String)) - .into(), - NestedField::optional(DEFAULT_ID, "city", Type::Primitive(PrimitiveType::String)) - .into(), + NestedField::optional( + DEFAULT_FIELD_ID, + "street", + Type::Primitive(PrimitiveType::String), + ) + .into(), + NestedField::optional( + DEFAULT_FIELD_ID, + "city", + Type::Primitive(PrimitiveType::String), + ) + .into(), ])), )); From dcf01302676e5db974a10a60240e9aed0898c27a Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 4 May 2026 14:42:15 +0300 Subject: [PATCH 14/15] cargo fmt --- crates/catalog/loader/tests/schema_update_suite.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crates/catalog/loader/tests/schema_update_suite.rs b/crates/catalog/loader/tests/schema_update_suite.rs index 7338967c76..9421bbf0ee 100644 --- a/crates/catalog/loader/tests/schema_update_suite.rs +++ b/crates/catalog/loader/tests/schema_update_suite.rs @@ -178,9 +178,7 @@ async fn test_catalog_schema_add_nested_and_delete_column(#[case] kind: CatalogK #[case::s3tables_catalog(CatalogKind::S3Tables)] #[case::memory_catalog(CatalogKind::Memory)] #[tokio::test] -async fn test_catalog_schema_delete_invalid_column_errors( - #[case] kind: CatalogKind, -) -> Result<()> { +async fn test_catalog_schema_delete_invalid_column_errors(#[case] kind: CatalogKind) -> Result<()> { let Some(harness) = load_catalog(kind).await else { return Ok(()); }; @@ -216,10 +214,7 @@ async fn test_catalog_schema_delete_invalid_column_errors( // Deleting a nonexistent field must fail. let tx = Transaction::new(&table); - let tx = tx - .update_schema() - .delete_column("nonexistent") - .apply(tx)?; + let tx = tx.update_schema().delete_column("nonexistent").apply(tx)?; let err = tx.commit(catalog.as_ref()).await.unwrap_err(); assert_eq!(err.kind(), ErrorKind::PreconditionFailed); From 11fde33374c1e8ccb39b5958d7fb894799b27b2a Mon Sep 17 00:00:00 2001 From: Victor Ghita Date: Mon, 4 May 2026 15:10:26 +0300 Subject: [PATCH 15/15] fmt and clippy --- crates/iceberg/src/transaction/update_schema.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index a16e10e493..0394dbda39 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -478,8 +478,7 @@ mod tests { use as_any::Downcast; use crate::spec::{ - DEFAULT_SCHEMA_ID, Literal, NestedField, PrimitiveType, Schema, StructType, TableMetadata, - Type, + DEFAULT_SCHEMA_ID, Literal, NestedField, PrimitiveType, StructType, TableMetadata, Type, }; use crate::table::Table; use crate::transaction::Transaction;