Skip to content

Commit 1f42a1d

Browse files
committed
attempt to fix some inconsistency and simplify logic
1 parent a2ef75b commit 1f42a1d

16 files changed

+381
-447
lines changed

src/iceberg/json_internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ nlohmann::json ToJson(const Snapshot& snapshot) {
399399
json[kTimestampMs] = UnixMsFromTimePointMs(snapshot.timestamp_ms);
400400
json[kManifestList] = snapshot.manifest_list;
401401
// If there is an operation, write the summary map
402-
if (snapshot.operation().has_value()) {
402+
if (snapshot.Operation().has_value()) {
403403
json[kSummary] = snapshot.summary;
404404
}
405405
SetOptionalField(json, kSchemaId, snapshot.schema_id);

src/iceberg/snapshot.cc

Lines changed: 59 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include "iceberg/snapshot.h"
2121

22+
#include <memory>
23+
2224
#include "iceberg/file_io.h"
2325
#include "iceberg/manifest/manifest_list.h"
2426
#include "iceberg/manifest/manifest_reader.h"
@@ -50,67 +52,44 @@ SnapshotRefType SnapshotRef::type() const noexcept {
5052
retention);
5153
}
5254

53-
Status SnapshotRef::MinSnapshotsToKeep(std::optional<int32_t> value) {
54-
ICEBERG_PRECHECK(this->type() != SnapshotRefType::kTag,
55-
"Tags do not support setting minSnapshotsToKeep");
56-
ICEBERG_PRECHECK(!value.has_value() || value.value() > 0,
57-
"Min snapshots to keep must be greater than 0");
58-
std::get<Branch>(this->retention).min_snapshots_to_keep = value;
59-
return {};
60-
}
61-
62-
Status SnapshotRef::MaxSnapshotAgeMs(std::optional<int64_t> value) {
63-
ICEBERG_PRECHECK(this->type() != SnapshotRefType::kTag,
64-
"Tags do not support setting maxSnapshotAgeMs");
65-
ICEBERG_PRECHECK(!value.has_value() || value.value() > 0,
66-
"Max snapshot age must be greater than 0 ms");
67-
std::get<Branch>(this->retention).max_snapshot_age_ms = value;
68-
return {};
69-
}
70-
71-
Status SnapshotRef::MaxRefAgeMs(std::optional<int64_t> value) {
72-
ICEBERG_PRECHECK(!value.has_value() || value.value() > 0,
73-
"Max reference age must be greater than 0");
74-
if (this->type() == SnapshotRefType::kBranch) {
75-
std::get<Branch>(this->retention).max_ref_age_ms = value;
55+
Status SnapshotRef::Validate() const {
56+
if (type() == SnapshotRefType::kBranch) {
57+
const auto& branch = std::get<Branch>(this->retention);
58+
ICEBERG_CHECK(!branch.min_snapshots_to_keep.has_value() ||
59+
branch.min_snapshots_to_keep.value() > 0,
60+
"Min snapshots to keep must be greater than 0");
61+
ICEBERG_CHECK(
62+
!branch.max_snapshot_age_ms.has_value() || branch.max_snapshot_age_ms.value() > 0,
63+
"Max snapshot age must be greater than 0 ms");
64+
ICEBERG_CHECK(!branch.max_ref_age_ms.has_value() || branch.max_ref_age_ms.value() > 0,
65+
"Max reference age must be greater than 0");
7666
} else {
77-
std::get<Tag>(this->retention).max_ref_age_ms = value;
67+
const auto& tag = std::get<Tag>(this->retention);
68+
ICEBERG_CHECK(!tag.max_ref_age_ms.has_value() || tag.max_ref_age_ms.value() > 0,
69+
"Max reference age must be greater than 0");
7870
}
7971
return {};
8072
}
8173

8274
Result<std::unique_ptr<SnapshotRef>> SnapshotRef::MakeBranch(
8375
int64_t snapshot_id, std::optional<int32_t> min_snapshots_to_keep,
8476
std::optional<int64_t> max_snapshot_age_ms, std::optional<int64_t> max_ref_age_ms) {
85-
// Validate optional parameters
86-
if (min_snapshots_to_keep.has_value() && min_snapshots_to_keep.value() <= 0) {
87-
return InvalidArgument("Min snapshots to keep must be greater than 0");
88-
}
89-
if (max_snapshot_age_ms.has_value() && max_snapshot_age_ms.value() <= 0) {
90-
return InvalidArgument("Max snapshot age must be greater than 0 ms");
91-
}
92-
if (max_ref_age_ms.has_value() && max_ref_age_ms.value() <= 0) {
93-
return InvalidArgument("Max reference age must be greater than 0");
94-
}
95-
96-
auto ref = std::make_unique<SnapshotRef>();
97-
ref->snapshot_id = snapshot_id;
98-
ref->retention = Branch{.min_snapshots_to_keep = min_snapshots_to_keep,
99-
.max_snapshot_age_ms = max_snapshot_age_ms,
100-
.max_ref_age_ms = max_ref_age_ms};
77+
auto ref = std::make_unique<SnapshotRef>(
78+
SnapshotRef{.snapshot_id = snapshot_id,
79+
.retention = Branch{
80+
.min_snapshots_to_keep = min_snapshots_to_keep,
81+
.max_snapshot_age_ms = max_snapshot_age_ms,
82+
.max_ref_age_ms = max_ref_age_ms,
83+
}});
84+
ICEBERG_RETURN_UNEXPECTED(ref->Validate());
10185
return ref;
10286
}
10387

10488
Result<std::unique_ptr<SnapshotRef>> SnapshotRef::MakeTag(
10589
int64_t snapshot_id, std::optional<int64_t> max_ref_age_ms) {
106-
// Validate optional parameter
107-
if (max_ref_age_ms.has_value() && max_ref_age_ms.value() <= 0) {
108-
return InvalidArgument("Max reference age must be greater than 0");
109-
}
110-
111-
auto ref = std::make_unique<SnapshotRef>();
112-
ref->snapshot_id = snapshot_id;
113-
ref->retention = Tag{.max_ref_age_ms = max_ref_age_ms};
90+
auto ref = std::make_unique<SnapshotRef>(SnapshotRef{
91+
.snapshot_id = snapshot_id, .retention = Tag{.max_ref_age_ms = max_ref_age_ms}});
92+
ICEBERG_RETURN_UNEXPECTED(ref->Validate());
11493
return ref;
11594
}
11695

@@ -140,7 +119,7 @@ bool SnapshotRef::Equals(const SnapshotRef& other) const {
140119
}
141120
}
142121

143-
std::optional<std::string_view> Snapshot::operation() const {
122+
std::optional<std::string_view> Snapshot::Operation() const {
144123
auto it = summary.find(SnapshotSummaryFields::kOperation);
145124
if (it != summary.end()) {
146125
return it->second;
@@ -176,6 +155,37 @@ bool Snapshot::Equals(const Snapshot& other) const {
176155
schema_id == other.schema_id;
177156
}
178157

158+
Result<std::unique_ptr<Snapshot>> Snapshot::Make(
159+
int64_t sequence_number, int64_t snapshot_id,
160+
std::optional<int64_t> parent_snapshot_id, TimePointMs timestamp_ms,
161+
std::string operation, std::unordered_map<std::string, std::string> summary,
162+
std::optional<int32_t> schema_id, std::string manifest_list,
163+
std::optional<int64_t> first_row_id, std::optional<int64_t> added_rows) {
164+
ICEBERG_PRECHECK(!operation.empty(), "Operation cannot be empty");
165+
ICEBERG_PRECHECK(!first_row_id.has_value() || first_row_id.value() >= 0,
166+
"Invalid first-row-id (cannot be negative): {}", first_row_id.value());
167+
ICEBERG_PRECHECK(!added_rows.has_value() || added_rows.value() >= 0,
168+
"Invalid added-rows (cannot be negative): {}", added_rows.value());
169+
ICEBERG_PRECHECK(!first_row_id.has_value() || added_rows.has_value(),
170+
"Missing added-rows when first-row-id is set");
171+
summary[SnapshotSummaryFields::kOperation] = operation;
172+
if (first_row_id.has_value()) {
173+
summary[SnapshotSummaryFields::kFirstRowId] = std::to_string(first_row_id.value());
174+
}
175+
if (added_rows.has_value()) {
176+
summary[SnapshotSummaryFields::kAddedRows] = std::to_string(added_rows.value());
177+
}
178+
return std::make_unique<Snapshot>(Snapshot{
179+
.snapshot_id = snapshot_id,
180+
.parent_snapshot_id = parent_snapshot_id,
181+
.sequence_number = sequence_number,
182+
.timestamp_ms = timestamp_ms,
183+
.manifest_list = std::move(manifest_list),
184+
.summary = std::move(summary),
185+
.schema_id = schema_id,
186+
});
187+
}
188+
179189
Result<SnapshotCache::ManifestsCache> SnapshotCache::InitManifestsCache(
180190
const Snapshot* snapshot, std::shared_ptr<FileIO> file_io) {
181191
if (file_io == nullptr) {

src/iceberg/snapshot.h

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,21 +113,6 @@ struct ICEBERG_EXPORT SnapshotRef {
113113

114114
SnapshotRefType type() const noexcept;
115115

116-
/// \brief Set the minimum number of snapshots to keep (branch only)
117-
/// \param value The minimum number of snapshots to keep, or nullopt for default
118-
/// \return Status indicating success or failure
119-
Status MinSnapshotsToKeep(std::optional<int32_t> value);
120-
121-
/// \brief Set the maximum snapshot age in milliseconds (branch only)
122-
/// \param value The maximum snapshot age in milliseconds, or nullopt for default
123-
/// \return Status indicating success or failure
124-
Status MaxSnapshotAgeMs(std::optional<int64_t> value);
125-
126-
/// \brief Set the maximum reference age in milliseconds
127-
/// \param value The maximum reference age in milliseconds, or nullopt for default
128-
/// \return Status indicating success or failure
129-
Status MaxRefAgeMs(std::optional<int64_t> value);
130-
131116
/// \brief Create a branch reference
132117
///
133118
/// \param snapshot_id The snapshot ID for the branch
@@ -158,6 +143,9 @@ struct ICEBERG_EXPORT SnapshotRef {
158143
std::unique_ptr<SnapshotRef> Clone(
159144
std::optional<int64_t> new_snapshot_id = std::nullopt) const;
160145

146+
/// \brief Validate the SnapshotRef
147+
Status Validate() const;
148+
161149
/// \brief Compare two snapshot refs for equality
162150
friend bool operator==(const SnapshotRef& lhs, const SnapshotRef& rhs) {
163151
return lhs.Equals(rhs);
@@ -169,13 +157,11 @@ struct ICEBERG_EXPORT SnapshotRef {
169157
};
170158

171159
/// \brief Optional Snapshot Summary Fields
172-
struct SnapshotSummaryFields {
160+
struct ICEBERG_EXPORT SnapshotSummaryFields {
173161
/// \brief The operation field key
174162
inline static const std::string kOperation = "operation";
175-
176163
/// \brief The first row id field key
177164
inline static const std::string kFirstRowId = "first-row-id";
178-
179165
/// \brief The added rows field key
180166
inline static const std::string kAddedRows = "added-rows";
181167

@@ -296,12 +282,21 @@ struct ICEBERG_EXPORT Snapshot {
296282
/// ID of the table's current schema when the snapshot was created.
297283
std::optional<int32_t> schema_id;
298284

285+
/// \brief Create a new Snapshot instance with validation on the inputs.
286+
static Result<std::unique_ptr<Snapshot>> Make(
287+
int64_t sequence_number, int64_t snapshot_id,
288+
std::optional<int64_t> parent_snapshot_id, TimePointMs timestamp_ms,
289+
std::string operation, std::unordered_map<std::string, std::string> summary,
290+
std::optional<int32_t> schema_id, std::string manifest_list,
291+
std::optional<int64_t> first_row_id = std::nullopt,
292+
std::optional<int64_t> added_rows = std::nullopt);
293+
299294
/// \brief Return the name of the DataOperations data operation that produced this
300295
/// snapshot.
301296
///
302297
/// \return the operation that produced this snapshot, or nullopt if the operation is
303298
/// unknown.
304-
std::optional<std::string_view> operation() const;
299+
std::optional<std::string_view> Operation() const;
305300

306301
/// \brief The row-id of the first newly added row in this snapshot.
307302
///

0 commit comments

Comments
 (0)