Skip to content

Commit 4c3a975

Browse files
committed
fix the case sensitivity bug
1 parent 822e081 commit 4c3a975

File tree

3 files changed

+99
-2
lines changed

3 files changed

+99
-2
lines changed

src/iceberg/test/update_schema_test.cc

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,85 @@ TEST_F(UpdateSchemaTest, DeleteRenameConflict) {
14561456
EXPECT_THAT(result, HasErrorMessage("Cannot rename a column that will be deleted"));
14571457
}
14581458

1459+
// Test case insensitive add then update
1460+
TEST_F(UpdateSchemaTest, CaseInsensitiveAddThenUpdate) {
1461+
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
1462+
update->CaseSensitive(false)
1463+
.AddColumn("Foo", int32(), "A column with uppercase name")
1464+
.UpdateColumn("foo", int64()); // Update using lowercase
1465+
1466+
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
1467+
1468+
// Verify the column was added and updated
1469+
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("Foo", false));
1470+
ASSERT_TRUE(field_opt.has_value());
1471+
EXPECT_EQ(*field_opt->get().type(), *int64()); // Type should be updated to int64
1472+
}
1473+
1474+
// Test case insensitive add then rename
1475+
TEST_F(UpdateSchemaTest, CaseInsensitiveAddThenRename) {
1476+
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
1477+
update->CaseSensitive(false)
1478+
.AddColumn("Foo", string(), "A column with uppercase name")
1479+
.RenameColumn("foo", "Bar"); // Rename using lowercase
1480+
1481+
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
1482+
1483+
// Verify the column was renamed
1484+
ICEBERG_UNWRAP_OR_FAIL(auto foo_opt, result.schema->FindFieldByName("Foo", false));
1485+
ICEBERG_UNWRAP_OR_FAIL(auto bar_opt, result.schema->FindFieldByName("Bar", false));
1486+
EXPECT_FALSE(foo_opt.has_value());
1487+
ASSERT_TRUE(bar_opt.has_value());
1488+
EXPECT_EQ(*bar_opt->get().type(), *string());
1489+
}
1490+
1491+
// Test case insensitive add then update doc
1492+
TEST_F(UpdateSchemaTest, CaseInsensitiveAddThenUpdateDoc) {
1493+
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
1494+
update->CaseSensitive(false)
1495+
.AddColumn("Foo", int32(), "original doc")
1496+
.UpdateColumnDoc("foo", "updated doc"); // Update doc using lowercase
1497+
1498+
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
1499+
1500+
// Verify the doc was updated
1501+
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("Foo", false));
1502+
ASSERT_TRUE(field_opt.has_value());
1503+
EXPECT_EQ(field_opt->get().doc(), "updated doc");
1504+
}
1505+
1506+
// Test case insensitive add then make optional
1507+
TEST_F(UpdateSchemaTest, CaseInsensitiveAddThenMakeOptional) {
1508+
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
1509+
update->CaseSensitive(false)
1510+
.AllowIncompatibleChanges()
1511+
.AddRequiredColumn("Foo", int32(), "required column")
1512+
.MakeColumnOptional("foo"); // Make optional using lowercase
1513+
1514+
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
1515+
1516+
// Verify the column is now optional
1517+
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("Foo", false));
1518+
ASSERT_TRUE(field_opt.has_value());
1519+
EXPECT_TRUE(field_opt->get().optional());
1520+
}
1521+
1522+
// Test case insensitive add then require
1523+
TEST_F(UpdateSchemaTest, CaseInsensitiveAddThenRequire) {
1524+
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
1525+
update->CaseSensitive(false)
1526+
.AllowIncompatibleChanges()
1527+
.AddColumn("Foo", int32(), "optional column")
1528+
.RequireColumn("foo"); // Require using lowercase
1529+
1530+
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
1531+
1532+
// Verify the column is now required
1533+
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("Foo", false));
1534+
ASSERT_TRUE(field_opt.has_value());
1535+
EXPECT_FALSE(field_opt->get().optional());
1536+
}
1537+
14591538
// Test mixed changes - comprehensive test combining multiple operations
14601539
TEST_F(UpdateSchemaTest, MixedChanges) {
14611540
// First, create a complex schema similar to Java's SCHEMA constant

src/iceberg/update/update_schema.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "iceberg/util/checked_cast.h"
3838
#include "iceberg/util/error_collector.h"
3939
#include "iceberg/util/macros.h"
40+
#include "iceberg/util/string_util.h"
4041
#include "iceberg/util/type_util.h"
4142
#include "iceberg/util/visit_type.h"
4243

@@ -716,7 +717,7 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
716717
int32_t new_id = AssignNewColumnId();
717718

718719
// Update tracking for moves
719-
added_name_to_id_[full_name] = new_id;
720+
added_name_to_id_[CaseSensitivityAwareName(full_name)] = new_id;
720721
if (parent_id != kTableRootId) {
721722
id_to_parent_[new_id] = parent_id;
722723
}
@@ -769,7 +770,7 @@ UpdateSchema::FindFieldForUpdate(std::string_view name) const {
769770
}
770771

771772
// Field not in original schema (or is being deleted), check if it's a newly added field
772-
auto added_it = added_name_to_id_.find(std::string(name));
773+
auto added_it = added_name_to_id_.find(CaseSensitivityAwareName(name));
773774
if (added_it != added_name_to_id_.end()) {
774775
int32_t added_id = added_it->second;
775776
auto update_it = updates_.find(added_id);
@@ -783,4 +784,12 @@ UpdateSchema::FindFieldForUpdate(std::string_view name) const {
783784
return std::nullopt;
784785
}
785786

787+
std::string UpdateSchema::CaseSensitivityAwareName(std::string_view name) const {
788+
if (case_sensitive_) {
789+
return std::string(name);
790+
}
791+
// Convert to lowercase for case-insensitive comparison
792+
return StringUtils::ToLower(name);
793+
}
794+
786795
} // namespace iceberg

src/iceberg/update/update_schema.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,15 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
371371
Result<std::optional<std::reference_wrapper<const SchemaField>>> FindFieldForUpdate(
372372
std::string_view name) const;
373373

374+
/// \brief Normalize a field name based on case sensitivity setting.
375+
///
376+
/// If case_sensitive_ is true, returns the name as-is.
377+
/// If case_sensitive_ is false, returns the lowercase version of the name.
378+
///
379+
/// \param name The field name to normalize.
380+
/// \return The normalized field name.
381+
std::string CaseSensitivityAwareName(std::string_view name) const;
382+
374383
// Internal state
375384
std::shared_ptr<Schema> schema_;
376385
int32_t last_column_id_;

0 commit comments

Comments
 (0)