Skip to content

Commit 3409de5

Browse files
committed
feat: Add UpdateSchema interface skeleton
- Define `UpdateSchema` class interface with full API documentation
1 parent ba92781 commit 3409de5

11 files changed

Lines changed: 728 additions & 0 deletions

File tree

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ set(ICEBERG_SOURCES
8282
update/pending_update.cc
8383
update/update_partition_spec.cc
8484
update/update_properties.cc
85+
update/update_schema.cc
8586
update/update_sort_order.cc
8687
util/bucket_util.cc
8788
util/content_file_util.cc

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ iceberg_sources = files(
103103
'update/pending_update.cc',
104104
'update/update_partition_spec.cc',
105105
'update/update_properties.cc',
106+
'update/update_schema.cc',
106107
'update/update_sort_order.cc',
107108
'util/bucket_util.cc',
108109
'util/content_file_util.cc',

src/iceberg/table.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "iceberg/transaction.h"
3333
#include "iceberg/update/update_partition_spec.h"
3434
#include "iceberg/update/update_properties.h"
35+
#include "iceberg/update/update_schema.h"
3536
#include "iceberg/util/macros.h"
3637

3738
namespace iceberg {
@@ -171,6 +172,13 @@ Result<std::shared_ptr<UpdateSortOrder>> Table::NewUpdateSortOrder() {
171172
return transaction->NewUpdateSortOrder();
172173
}
173174

175+
Result<std::shared_ptr<UpdateSchema>> Table::NewUpdateSchema() {
176+
ICEBERG_ASSIGN_OR_RAISE(
177+
auto transaction, Transaction::Make(shared_from_this(), Transaction::Kind::kUpdate,
178+
/*auto_commit=*/true));
179+
return transaction->NewUpdateSchema();
180+
}
181+
174182
Result<std::shared_ptr<StagedTable>> StagedTable::Make(
175183
TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
176184
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -221,4 +229,8 @@ Result<std::shared_ptr<UpdateProperties>> StaticTable::NewUpdateProperties() {
221229
return NotSupported("Cannot create an update properties for a static table");
222230
}
223231

232+
Result<std::shared_ptr<UpdateSchema>> StaticTable::NewUpdateSchema() {
233+
return NotSupported("Cannot create an update schema for a static table");
234+
}
235+
224236
} // namespace iceberg

src/iceberg/table.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this<Table> {
140140
/// changes.
141141
virtual Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
142142

143+
/// \brief Create a new UpdateSchema to alter the columns of this table and commit the
144+
/// changes.
145+
virtual Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
146+
143147
protected:
144148
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
145149
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -187,6 +191,8 @@ class ICEBERG_EXPORT StaticTable final : public Table {
187191

188192
Result<std::shared_ptr<UpdateProperties>> NewUpdateProperties() override;
189193

194+
Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema() override;
195+
190196
private:
191197
using Table::Table;
192198
};

src/iceberg/transaction.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <memory>
2323

2424
#include "iceberg/catalog.h"
25+
#include "iceberg/schema.h"
2526
#include "iceberg/table.h"
2627
#include "iceberg/table_metadata.h"
2728
#include "iceberg/table_requirement.h"
@@ -30,6 +31,7 @@
3031
#include "iceberg/update/pending_update.h"
3132
#include "iceberg/update/update_partition_spec.h"
3233
#include "iceberg/update/update_properties.h"
34+
#include "iceberg/update/update_schema.h"
3335
#include "iceberg/update/update_sort_order.h"
3436
#include "iceberg/util/checked_cast.h"
3537
#include "iceberg/util/macros.h"
@@ -105,6 +107,12 @@ Status Transaction::Apply(PendingUpdate& update) {
105107
metadata_builder_->AddPartitionSpec(std::move(result.spec));
106108
}
107109
} break;
110+
case PendingUpdate::Kind::kUpdateSchema: {
111+
auto& update_schema = internal::checked_cast<UpdateSchema&>(update);
112+
ICEBERG_ASSIGN_OR_RAISE(auto result, update_schema.Apply());
113+
metadata_builder_->SetCurrentSchema(std::move(result.schema),
114+
result.new_last_column_id);
115+
} break;
108116
default:
109117
return NotSupported("Unsupported pending update: {}",
110118
static_cast<int32_t>(update.kind()));
@@ -178,4 +186,11 @@ Result<std::shared_ptr<UpdateSortOrder>> Transaction::NewUpdateSortOrder() {
178186
return update_sort_order;
179187
}
180188

189+
Result<std::shared_ptr<UpdateSchema>> Transaction::NewUpdateSchema() {
190+
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<UpdateSchema> update_schema,
191+
UpdateSchema::Make(shared_from_this()));
192+
ICEBERG_RETURN_UNEXPECTED(AddUpdate(update_schema));
193+
return update_schema;
194+
}
195+
181196
} // namespace iceberg

src/iceberg/transaction.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ class ICEBERG_EXPORT Transaction : public std::enable_shared_from_this<Transacti
6868
/// changes.
6969
Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
7070

71+
/// \brief Create a new UpdateSchema to alter the columns of this table and commit the
72+
/// changes.
73+
Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
74+
7175
private:
7276
Transaction(std::shared_ptr<Table> table, Kind kind, bool auto_commit,
7377
std::unique_ptr<TableMetadataBuilder> metadata_builder);

src/iceberg/type_fwd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ class Transaction;
182182
class PendingUpdate;
183183
class UpdatePartitionSpec;
184184
class UpdateProperties;
185+
class UpdateSchema;
185186
class UpdateSortOrder;
186187

187188
/// ----------------------------------------------------------------------------

src/iceberg/update/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ install_headers(
1919
[
2020
'pending_update.h',
2121
'update_partition_spec.h',
22+
'update_schema.h',
2223
'update_sort_order.h',
2324
'update_properties.h',
2425
],

src/iceberg/update/pending_update.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class ICEBERG_EXPORT PendingUpdate : public ErrorCollector {
4444
enum class Kind : uint8_t {
4545
kUpdatePartitionSpec,
4646
kUpdateProperties,
47+
kUpdateSchema,
4748
kUpdateSortOrder,
4849
};
4950

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/update/update_schema.h"
21+
22+
#include <memory>
23+
#include <optional>
24+
#include <ranges>
25+
#include <string>
26+
#include <string_view>
27+
#include <unordered_set>
28+
#include <utility>
29+
#include <vector>
30+
31+
#include "iceberg/schema.h"
32+
#include "iceberg/table_metadata.h"
33+
#include "iceberg/transaction.h"
34+
#include "iceberg/type.h"
35+
#include "iceberg/util/error_collector.h"
36+
#include "iceberg/util/macros.h"
37+
38+
namespace iceberg {
39+
40+
Result<std::shared_ptr<UpdateSchema>> UpdateSchema::Make(
41+
std::shared_ptr<Transaction> transaction) {
42+
ICEBERG_PRECHECK(transaction != nullptr,
43+
"Cannot create UpdateSchema without transaction");
44+
return std::shared_ptr<UpdateSchema>(new UpdateSchema(std::move(transaction)));
45+
}
46+
47+
UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
48+
: PendingUpdate(std::move(transaction)) {
49+
const TableMetadata& base_metadata = transaction_->current();
50+
51+
// Get the current schema
52+
auto schema_result = base_metadata.Schema();
53+
if (!schema_result.has_value()) {
54+
AddError(schema_result.error());
55+
return;
56+
}
57+
schema_ = std::move(schema_result.value());
58+
59+
// Initialize last_column_id from base metadata
60+
last_column_id_ = base_metadata.last_column_id;
61+
62+
// Initialize identifier field names from the current schema
63+
auto identifier_names_result = schema_->IdentifierFieldNames();
64+
if (!identifier_names_result.has_value()) {
65+
AddError(identifier_names_result.error());
66+
return;
67+
}
68+
identifier_field_names_ = identifier_names_result.value() |
69+
std::ranges::to<std::unordered_set<std::string>>();
70+
}
71+
72+
UpdateSchema::~UpdateSchema() = default;
73+
74+
UpdateSchema& UpdateSchema::AllowIncompatibleChanges() {
75+
allow_incompatible_changes_ = true;
76+
return *this;
77+
}
78+
79+
UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) {
80+
case_sensitive_ = case_sensitive;
81+
return *this;
82+
}
83+
84+
UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Type> type) {
85+
return AddColumn(name, std::move(type), "");
86+
}
87+
88+
UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Type> type,
89+
std::string_view doc) {
90+
// Check for "." in top-level name
91+
ICEBERG_BUILDER_CHECK(!name.contains('.'),
92+
"Cannot add column with ambiguous name: {}, use "
93+
"AddColumn(parent, name, type, doc)",
94+
name);
95+
return AddColumnInternal(std::nullopt, name, /*is_optional=*/true, std::move(type),
96+
doc);
97+
}
98+
99+
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string> parent,
100+
std::string_view name, std::shared_ptr<Type> type) {
101+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type),
102+
"");
103+
}
104+
105+
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string> parent,
106+
std::string_view name, std::shared_ptr<Type> type,
107+
std::string_view doc) {
108+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type),
109+
doc);
110+
}
111+
112+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
113+
std::shared_ptr<Type> type) {
114+
return AddRequiredColumn(name, std::move(type), "");
115+
}
116+
117+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
118+
std::shared_ptr<Type> type,
119+
std::string_view doc) {
120+
// Check for "." in top-level name
121+
ICEBERG_BUILDER_CHECK(!name.contains('.'),
122+
"Cannot add column with ambiguous name: {}, use "
123+
"AddRequiredColumn(parent, name, type, doc)",
124+
name);
125+
return AddColumnInternal(std::nullopt, name, /*is_optional=*/false, std::move(type),
126+
doc);
127+
}
128+
129+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string> parent,
130+
std::string_view name,
131+
std::shared_ptr<Type> type) {
132+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
133+
std::move(type), "");
134+
}
135+
136+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string> parent,
137+
std::string_view name,
138+
std::shared_ptr<Type> type,
139+
std::string_view doc) {
140+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
141+
std::move(type), doc);
142+
}
143+
144+
UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
145+
std::shared_ptr<PrimitiveType> new_type) {
146+
// TODO(Guotao Yu): Implement UpdateColumn
147+
AddError(NotImplemented("UpdateSchema::UpdateColumn not implemented"));
148+
return *this;
149+
}
150+
151+
UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
152+
std::shared_ptr<PrimitiveType> new_type,
153+
std::string_view new_doc) {
154+
return UpdateColumn(name, std::move(new_type)).UpdateColumnDoc(name, new_doc);
155+
}
156+
157+
UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string> parent,
158+
std::string_view name, bool is_optional,
159+
std::shared_ptr<Type> type,
160+
std::string_view doc) {
161+
// TODO(Guotao Yu): Implement AddColumnInternal logic
162+
// This is where the real work happens - finding parent, validating, etc.
163+
AddError(NotImplemented("UpdateSchema::AddColumnInternal not implemented"));
164+
return *this;
165+
}
166+
167+
UpdateSchema& UpdateSchema::RenameColumn(std::string_view name,
168+
std::string_view new_name) {
169+
// TODO(Guotao Yu): Implement RenameColumn
170+
AddError(NotImplemented("UpdateSchema::RenameColumn not implemented"));
171+
return *this;
172+
}
173+
174+
UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
175+
std::string_view new_doc) {
176+
// TODO(Guotao Yu): Implement UpdateColumnDoc
177+
AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
178+
return *this;
179+
}
180+
181+
UpdateSchema& UpdateSchema::MakeColumnOptional(std::string_view name) {
182+
// TODO(Guotao Yu): Implement MakeColumnOptional
183+
AddError(NotImplemented("UpdateSchema::MakeColumnOptional not implemented"));
184+
return *this;
185+
}
186+
187+
UpdateSchema& UpdateSchema::RequireColumn(std::string_view name) {
188+
// TODO(Guotao Yu): Implement RequireColumn
189+
AddError(NotImplemented("UpdateSchema::RequireColumn not implemented"));
190+
return *this;
191+
}
192+
193+
UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
194+
// TODO(Guotao Yu): Implement DeleteColumn
195+
AddError(NotImplemented("UpdateSchema::DeleteColumn not implemented"));
196+
return *this;
197+
}
198+
199+
UpdateSchema& UpdateSchema::MoveFirst(std::string_view name) {
200+
// TODO(Guotao Yu): Implement MoveFirst
201+
AddError(NotImplemented("UpdateSchema::MoveFirst not implemented"));
202+
return *this;
203+
}
204+
205+
UpdateSchema& UpdateSchema::MoveBefore(std::string_view name,
206+
std::string_view before_name) {
207+
// TODO(Guotao Yu): Implement MoveBefore
208+
AddError(NotImplemented("UpdateSchema::MoveBefore not implemented"));
209+
return *this;
210+
}
211+
212+
UpdateSchema& UpdateSchema::MoveAfter(std::string_view name,
213+
std::string_view after_name) {
214+
// TODO(Guotao Yu): Implement MoveAfter
215+
AddError(NotImplemented("UpdateSchema::MoveAfter not implemented"));
216+
return *this;
217+
}
218+
219+
UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr<Schema> new_schema) {
220+
// TODO(Guotao Yu): Implement UnionByNameWith
221+
AddError(NotImplemented("UpdateSchema::UnionByNameWith not implemented"));
222+
return *this;
223+
}
224+
225+
UpdateSchema& UpdateSchema::SetIdentifierFields(const std::vector<std::string>& names) {
226+
identifier_field_names_ = names | std::ranges::to<std::unordered_set<std::string>>();
227+
return *this;
228+
}
229+
230+
Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
231+
// TODO(Guotao Yu): Implement Apply
232+
return NotImplemented("UpdateSchema::Apply not implemented");
233+
}
234+
235+
} // namespace iceberg

0 commit comments

Comments
 (0)