Skip to content

Commit 85b52a3

Browse files
committed
resolve some comments
1 parent 3409de5 commit 85b52a3

2 files changed

Lines changed: 56 additions & 126 deletions

File tree

src/iceberg/update/update_schema.cc

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include <string_view>
2727
#include <unordered_set>
2828
#include <utility>
29-
#include <vector>
3029

3130
#include "iceberg/schema.h"
3231
#include "iceberg/table_metadata.h"
@@ -81,10 +80,6 @@ UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) {
8180
return *this;
8281
}
8382

84-
UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Type> type) {
85-
return AddColumn(name, std::move(type), "");
86-
}
87-
8883
UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Type> type,
8984
std::string_view doc) {
9085
// Check for "." in top-level name
@@ -96,24 +91,13 @@ UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Typ
9691
doc);
9792
}
9893

99-
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string> parent,
100-
std::string_view name, std::shared_ptr<Type> type) {
101-
return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type),
102-
"");
103-
}
104-
105-
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string> parent,
94+
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string_view> parent,
10695
std::string_view name, std::shared_ptr<Type> type,
10796
std::string_view doc) {
10897
return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type),
10998
doc);
11099
}
111100

112-
UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
113-
std::shared_ptr<Type> type) {
114-
return AddRequiredColumn(name, std::move(type), "");
115-
}
116-
117101
UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
118102
std::shared_ptr<Type> type,
119103
std::string_view doc) {
@@ -126,14 +110,14 @@ UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
126110
doc);
127111
}
128112

129-
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string> parent,
113+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string_view> parent,
130114
std::string_view name,
131115
std::shared_ptr<Type> type) {
132116
return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
133117
std::move(type), "");
134118
}
135119

136-
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string> parent,
120+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string_view> parent,
137121
std::string_view name,
138122
std::shared_ptr<Type> type,
139123
std::string_view doc) {
@@ -148,13 +132,14 @@ UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
148132
return *this;
149133
}
150134

151-
UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
152-
std::shared_ptr<PrimitiveType> new_type,
153-
std::string_view new_doc) {
154-
return UpdateColumn(name, std::move(new_type)).UpdateColumnDoc(name, new_doc);
135+
UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
136+
std::string_view new_doc) {
137+
// TODO(Guotao Yu): Implement UpdateColumnDoc
138+
AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
139+
return *this;
155140
}
156141

157-
UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string> parent,
142+
UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> parent,
158143
std::string_view name, bool is_optional,
159144
std::shared_ptr<Type> type,
160145
std::string_view doc) {
@@ -171,13 +156,6 @@ UpdateSchema& UpdateSchema::RenameColumn(std::string_view name,
171156
return *this;
172157
}
173158

174-
UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
175-
std::string_view new_doc) {
176-
// TODO(Guotao Yu): Implement UpdateColumnDoc
177-
AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
178-
return *this;
179-
}
180-
181159
UpdateSchema& UpdateSchema::MakeColumnOptional(std::string_view name) {
182160
// TODO(Guotao Yu): Implement MakeColumnOptional
183161
AddError(NotImplemented("UpdateSchema::MakeColumnOptional not implemented"));
@@ -222,7 +200,8 @@ UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr<Schema> new_schema)
222200
return *this;
223201
}
224202

225-
UpdateSchema& UpdateSchema::SetIdentifierFields(const std::vector<std::string>& names) {
203+
UpdateSchema& UpdateSchema::SetIdentifierFields(
204+
const std::span<std::string_view>& names) {
226205
identifier_field_names_ = names | std::ranges::to<std::unordered_set<std::string>>();
227206
return *this;
228207
}

src/iceberg/update/update_schema.h

Lines changed: 45 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424

2525
#include <memory>
2626
#include <optional>
27+
#include <span>
2728
#include <string>
2829
#include <string_view>
2930
#include <unordered_set>
30-
#include <vector>
3131

3232
#include "iceberg/iceberg_export.h"
3333
#include "iceberg/result.h"
@@ -40,6 +40,10 @@ namespace iceberg {
4040
///
4141
/// When committing, these changes will be applied to the current table metadata.
4242
/// Commit conflicts will not be resolved and will result in a CommitFailed error.
43+
///
44+
/// TODO(Guotao Yu): Add support for V3 default values when adding columns. Currently, all
45+
/// added columns use null as the default value, but Iceberg V3 supports custom
46+
/// default values for new columns.
4347
class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
4448
public:
4549
static Result<std::shared_ptr<UpdateSchema>> Make(
@@ -63,24 +67,6 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
6367
/// \return Reference to this for method chaining.
6468
UpdateSchema& AllowIncompatibleChanges();
6569

66-
/// \brief Add a new optional top-level column.
67-
///
68-
/// Because "." may be interpreted as a column path separator or may be used in
69-
/// field names, it is not allowed in names passed to this method. To add to nested
70-
/// structures or to add fields with names that contain ".", use AddColumn(parent,
71-
/// name, type, ...).
72-
///
73-
/// If type is a nested type, its field IDs are reassigned when added to the
74-
/// existing schema.
75-
///
76-
/// The added column will be optional with a null default value.
77-
///
78-
/// \param name Name for the new column.
79-
/// \param type Type for the new column.
80-
/// \return Reference to this for method chaining.
81-
/// \throws InvalidArgument If name contains ".".
82-
UpdateSchema& AddColumn(std::string_view name, std::shared_ptr<Type> type);
83-
8470
/// \brief Add a new optional top-level column with documentation.
8571
///
8672
/// Because "." may be interpreted as a column path separator or may be used in
@@ -97,33 +83,9 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
9783
/// \param type Type for the new column.
9884
/// \param doc Documentation string for the new column.
9985
/// \return Reference to this for method chaining.
100-
/// \throws InvalidArgument If name contains ".".
86+
/// \note InvalidArgument will be reported if name contains ".".
10187
UpdateSchema& AddColumn(std::string_view name, std::shared_ptr<Type> type,
102-
std::string_view doc);
103-
104-
/// \brief Add a new optional column to a nested struct.
105-
///
106-
/// The parent name is used to find the parent using Schema::FindFieldByName(). If
107-
/// the parent name is null or empty, the new column will be added to the root as a
108-
/// top-level column. If parent identifies a struct, a new column is added to that
109-
/// struct. If it identifies a list, the column is added to the list element struct,
110-
/// and if it identifies a map, the new column is added to the map's value struct.
111-
///
112-
/// The given name is used to name the new column and names containing "." are not
113-
/// handled differently.
114-
///
115-
/// If type is a nested type, its field IDs are reassigned when added to the
116-
/// existing schema.
117-
///
118-
/// The added column will be optional with a null default value.
119-
///
120-
/// \param parent Name of the parent struct to which the column will be added.
121-
/// \param name Name for the new column.
122-
/// \param type Type for the new column.
123-
/// \return Reference to this for method chaining.
124-
/// \throws InvalidArgument If parent doesn't identify a struct.
125-
UpdateSchema& AddColumn(std::optional<std::string> parent, std::string_view name,
126-
std::shared_ptr<Type> type);
88+
std::string_view doc = "");
12789

12890
/// \brief Add a new optional column to a nested struct with documentation.
12991
///
@@ -146,9 +108,9 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
146108
/// \param type Type for the new column.
147109
/// \param doc Documentation string for the new column.
148110
/// \return Reference to this for method chaining.
149-
/// \throws InvalidArgument If parent doesn't identify a struct.
150-
UpdateSchema& AddColumn(std::optional<std::string> parent, std::string_view name,
151-
std::shared_ptr<Type> type, std::string_view doc);
111+
/// \note InvalidArgument will be reported if parent doesn't identify a struct.
112+
UpdateSchema& AddColumn(std::optional<std::string_view> parent, std::string_view name,
113+
std::shared_ptr<Type> type, std::string_view doc = "");
152114

153115
/// \brief Add a new required top-level column.
154116
///
@@ -167,7 +129,7 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
167129
/// \param name Name for the new column.
168130
/// \param type Type for the new column.
169131
/// \return Reference to this for method chaining.
170-
/// \throws InvalidArgument If name contains ".".
132+
/// \note InvalidArgument will be reported if name contains ".".
171133
UpdateSchema& AddRequiredColumn(std::string_view name, std::shared_ptr<Type> type);
172134

173135
/// \brief Add a new required top-level column with documentation.
@@ -188,7 +150,7 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
188150
/// \param type Type for the new column.
189151
/// \param doc Documentation string for the new column.
190152
/// \return Reference to this for method chaining.
191-
/// \throws InvalidArgument If name contains ".".
153+
/// \note InvalidArgument will be reported if name contains ".".
192154
UpdateSchema& AddRequiredColumn(std::string_view name, std::shared_ptr<Type> type,
193155
std::string_view doc);
194156

@@ -214,8 +176,8 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
214176
/// \param name Name for the new column.
215177
/// \param type Type for the new column.
216178
/// \return Reference to this for method chaining.
217-
/// \throws InvalidArgument If parent doesn't identify a struct.
218-
UpdateSchema& AddRequiredColumn(std::optional<std::string> parent,
179+
/// \note InvalidArgument will be reported if parent doesn't identify a struct.
180+
UpdateSchema& AddRequiredColumn(std::optional<std::string_view> parent,
219181
std::string_view name, std::shared_ptr<Type> type);
220182

221183
/// \brief Add a new required column to a nested struct with documentation.
@@ -241,8 +203,8 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
241203
/// \param type Type for the new column.
242204
/// \param doc Documentation string for the new column.
243205
/// \return Reference to this for method chaining.
244-
/// \throws InvalidArgument If parent doesn't identify a struct.
245-
UpdateSchema& AddRequiredColumn(std::optional<std::string> parent,
206+
/// \note InvalidArgument will be reported if parent doesn't identify a struct.
207+
UpdateSchema& AddRequiredColumn(std::optional<std::string_view> parent,
246208
std::string_view name, std::shared_ptr<Type> type,
247209
std::string_view doc);
248210

@@ -258,8 +220,9 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
258220
/// \param name Name of the column to rename.
259221
/// \param new_name Replacement name for the column.
260222
/// \return Reference to this for method chaining.
261-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
262-
/// this change conflicts with other additions, renames, or updates.
223+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
224+
/// schema or if
225+
/// this change conflicts with other additions, renames, or updates.
263226
UpdateSchema& RenameColumn(std::string_view name, std::string_view new_name);
264227

265228
/// \brief Update a column in the schema to a new primitive type.
@@ -273,40 +236,23 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
273236
/// \param name Name of the column to update.
274237
/// \param new_type Replacement type for the column (must be primitive).
275238
/// \return Reference to this for method chaining.
276-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
277-
/// this change introduces a type incompatibility or if it conflicts with
278-
/// other additions, renames, or updates.
239+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
240+
/// schema or if
241+
/// this change introduces a type incompatibility or if it conflicts with
242+
/// other additions, renames, or updates.
279243
UpdateSchema& UpdateColumn(std::string_view name,
280244
std::shared_ptr<PrimitiveType> new_type);
281245

282-
/// \brief Update a column in the schema to a new primitive type with documentation.
283-
///
284-
/// The name is used to find the column to update using Schema::FindFieldByName().
285-
///
286-
/// Only updates that widen types are allowed.
287-
///
288-
/// Columns may be updated and renamed in the same schema update.
289-
///
290-
/// \param name Name of the column to update.
291-
/// \param new_type Replacement type for the column (must be primitive).
292-
/// \param new_doc Replacement documentation string for the column.
293-
/// \return Reference to this for method chaining.
294-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
295-
/// this change introduces a type incompatibility or if it conflicts with
296-
/// other additions, renames, or updates.
297-
UpdateSchema& UpdateColumn(std::string_view name,
298-
std::shared_ptr<PrimitiveType> new_type,
299-
std::string_view new_doc);
300-
301246
/// \brief Update the documentation string for a column.
302247
///
303248
/// The name is used to find the column to update using Schema::FindFieldByName().
304249
///
305250
/// \param name Name of the column to update the documentation string for.
306251
/// \param new_doc Replacement documentation string for the column.
307252
/// \return Reference to this for method chaining.
308-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
309-
/// the column will be deleted.
253+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
254+
/// schema or if
255+
/// the column will be deleted.
310256
UpdateSchema& UpdateColumnDoc(std::string_view name, std::string_view new_doc);
311257

312258
/// \brief Update a column to be optional.
@@ -330,17 +276,19 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
330276
///
331277
/// \param name Name of the column to delete.
332278
/// \return Reference to this for method chaining.
333-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
334-
/// this change conflicts with other additions, renames, or updates.
279+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
280+
/// schema or if
281+
/// this change conflicts with other additions, renames, or updates.
335282
UpdateSchema& DeleteColumn(std::string_view name);
336283

337284
/// \brief Move a column from its current position to the start of the schema or its
338285
/// parent struct.
339286
///
340287
/// \param name Name of the column to move.
341288
/// \return Reference to this for method chaining.
342-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
343-
/// this change conflicts with other changes.
289+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
290+
/// schema or if
291+
/// this change conflicts with other changes.
344292
UpdateSchema& MoveFirst(std::string_view name);
345293

346294
/// \brief Move a column from its current position to directly before a reference
@@ -353,8 +301,9 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
353301
/// \param name Name of the column to move.
354302
/// \param before_name Name of the reference column.
355303
/// \return Reference to this for method chaining.
356-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
357-
/// this change conflicts with other changes.
304+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
305+
/// schema or if
306+
/// this change conflicts with other changes.
358307
UpdateSchema& MoveBefore(std::string_view name, std::string_view before_name);
359308

360309
/// \brief Move a column from its current position to directly after a reference
@@ -367,8 +316,9 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
367316
/// \param name Name of the column to move.
368317
/// \param after_name Name of the reference column.
369318
/// \return Reference to this for method chaining.
370-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
371-
/// this change conflicts with other changes.
319+
/// \note InvalidArgument will be reported if name doesn't identify a column in the
320+
/// schema or if
321+
/// this change conflicts with other changes.
372322
UpdateSchema& MoveAfter(std::string_view name, std::string_view after_name);
373323

374324
/// \brief Applies all field additions and updates from the provided new schema to
@@ -388,10 +338,11 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
388338
/// \param new_schema A schema used in conjunction with the existing schema to
389339
/// create a union schema.
390340
/// \return Reference to this for method chaining.
391-
/// \throws InvalidState If it encounters errors during provided schema traversal.
392-
/// \throws InvalidArgument If name doesn't identify a column in the schema or if
393-
/// this change introduces a type incompatibility or if it conflicts with
394-
/// other additions, renames, or updates.
341+
/// \note InvalidState will be reported if it encounters errors during provided schema
342+
/// traversal. \note InvalidArgument will be reported if name doesn't identify a column
343+
/// in the schema or if
344+
/// this change introduces a type incompatibility or if it conflicts with
345+
/// other additions, renames, or updates.
395346
UpdateSchema& UnionByNameWith(std::shared_ptr<Schema> new_schema);
396347

397348
/// \brief Set the identifier fields given a set of field names.
@@ -401,7 +352,7 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
401352
///
402353
/// \param names Names of the columns to set as identifier fields.
403354
/// \return Reference to this for method chaining.
404-
UpdateSchema& SetIdentifierFields(const std::vector<std::string>& names);
355+
UpdateSchema& SetIdentifierFields(const std::span<std::string_view>& names);
405356

406357
/// \brief Determines if the case of schema needs to be considered when comparing
407358
/// column names.
@@ -436,7 +387,7 @@ class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
436387
/// \param type Type for the new column.
437388
/// \param doc Optional documentation string.
438389
/// \return Reference to this for method chaining.
439-
UpdateSchema& AddColumnInternal(std::optional<std::string> parent,
390+
UpdateSchema& AddColumnInternal(std::optional<std::string_view> parent,
440391
std::string_view name, bool is_optional,
441392
std::shared_ptr<Type> type, std::string_view doc);
442393

0 commit comments

Comments
 (0)