1919
2020#include " iceberg/snapshot.h"
2121
22+ #include < memory>
23+
2224#include " iceberg/file_io.h"
2325#include " iceberg/manifest/manifest_list.h"
2426#include " iceberg/manifest/manifest_reader.h"
2527#include " iceberg/util/macros.h"
28+ #include " iceberg/util/string_util.h"
2629
2730namespace iceberg {
2831
@@ -49,6 +52,55 @@ SnapshotRefType SnapshotRef::type() const noexcept {
4952 retention);
5053}
5154
55+ Status SnapshotRef::Validate () const {
56+ if (type () == SnapshotRefType::kBranch ) {
57+ const auto & branch = std::get<Branch>(this ->retention );
58+ ICEBERG_CHECK (!branch.min_snapshots_to_keep .has_value () ||
59+ branch.min_snapshots_to_keep .value () > 0 ,
60+ " Min snapshots to keep must be greater than 0" );
61+ ICEBERG_CHECK (
62+ !branch.max_snapshot_age_ms .has_value () || branch.max_snapshot_age_ms .value () > 0 ,
63+ " Max snapshot age must be greater than 0 ms" );
64+ ICEBERG_CHECK (!branch.max_ref_age_ms .has_value () || branch.max_ref_age_ms .value () > 0 ,
65+ " Max reference age must be greater than 0" );
66+ } else {
67+ const auto & tag = std::get<Tag>(this ->retention );
68+ ICEBERG_CHECK (!tag.max_ref_age_ms .has_value () || tag.max_ref_age_ms .value () > 0 ,
69+ " Max reference age must be greater than 0" );
70+ }
71+ return {};
72+ }
73+
74+ Result<std::unique_ptr<SnapshotRef>> SnapshotRef::MakeBranch (
75+ int64_t snapshot_id, std::optional<int32_t > min_snapshots_to_keep,
76+ std::optional<int64_t > max_snapshot_age_ms, std::optional<int64_t > max_ref_age_ms) {
77+ auto ref = std::make_unique<SnapshotRef>(
78+ SnapshotRef{.snapshot_id = snapshot_id,
79+ .retention = Branch{
80+ .min_snapshots_to_keep = min_snapshots_to_keep,
81+ .max_snapshot_age_ms = max_snapshot_age_ms,
82+ .max_ref_age_ms = max_ref_age_ms,
83+ }});
84+ ICEBERG_RETURN_UNEXPECTED (ref->Validate ());
85+ return ref;
86+ }
87+
88+ Result<std::unique_ptr<SnapshotRef>> SnapshotRef::MakeTag (
89+ int64_t snapshot_id, std::optional<int64_t > max_ref_age_ms) {
90+ auto ref = std::make_unique<SnapshotRef>(SnapshotRef{
91+ .snapshot_id = snapshot_id, .retention = Tag{.max_ref_age_ms = max_ref_age_ms}});
92+ ICEBERG_RETURN_UNEXPECTED (ref->Validate ());
93+ return ref;
94+ }
95+
96+ std::unique_ptr<SnapshotRef> SnapshotRef::Clone (
97+ std::optional<int64_t > new_snapshot_id) const {
98+ auto ref = std::make_unique<SnapshotRef>();
99+ ref->snapshot_id = new_snapshot_id.value_or (snapshot_id);
100+ ref->retention = retention;
101+ return ref;
102+ }
103+
52104bool SnapshotRef::Equals (const SnapshotRef& other) const {
53105 if (this == &other) {
54106 return true ;
@@ -67,14 +119,32 @@ bool SnapshotRef::Equals(const SnapshotRef& other) const {
67119 }
68120}
69121
70- std::optional<std::string_view> Snapshot::operation () const {
122+ std::optional<std::string_view> Snapshot::Operation () const {
71123 auto it = summary.find (SnapshotSummaryFields::kOperation );
72124 if (it != summary.end ()) {
73125 return it->second ;
74126 }
75127 return std::nullopt ;
76128}
77129
130+ Result<std::optional<int64_t >> Snapshot::FirstRowId () const {
131+ auto it = summary.find (SnapshotSummaryFields::kFirstRowId );
132+ if (it == summary.end ()) {
133+ return std::nullopt ;
134+ }
135+
136+ return StringUtils::ParseInt<int64_t >(it->second );
137+ }
138+
139+ Result<std::optional<int64_t >> Snapshot::AddedRows () const {
140+ auto it = summary.find (SnapshotSummaryFields::kAddedRows );
141+ if (it == summary.end ()) {
142+ return std::nullopt ;
143+ }
144+
145+ return StringUtils::ParseInt<int64_t >(it->second );
146+ }
147+
78148bool Snapshot::Equals (const Snapshot& other) const {
79149 if (this == &other) {
80150 return true ;
@@ -85,6 +155,37 @@ bool Snapshot::Equals(const Snapshot& other) const {
85155 schema_id == other.schema_id ;
86156}
87157
158+ Result<std::unique_ptr<Snapshot>> Snapshot::Make (
159+ int64_t sequence_number, int64_t snapshot_id,
160+ std::optional<int64_t > parent_snapshot_id, TimePointMs timestamp_ms,
161+ std::string operation, std::unordered_map<std::string, std::string> summary,
162+ std::optional<int32_t > schema_id, std::string manifest_list,
163+ std::optional<int64_t > first_row_id, std::optional<int64_t > added_rows) {
164+ ICEBERG_PRECHECK (!operation.empty (), " Operation cannot be empty" );
165+ ICEBERG_PRECHECK (!first_row_id.has_value () || first_row_id.value () >= 0 ,
166+ " Invalid first-row-id (cannot be negative): {}" , first_row_id.value ());
167+ ICEBERG_PRECHECK (!added_rows.has_value () || added_rows.value () >= 0 ,
168+ " Invalid added-rows (cannot be negative): {}" , added_rows.value ());
169+ ICEBERG_PRECHECK (!first_row_id.has_value () || added_rows.has_value (),
170+ " Missing added-rows when first-row-id is set" );
171+ summary[SnapshotSummaryFields::kOperation ] = operation;
172+ if (first_row_id.has_value ()) {
173+ summary[SnapshotSummaryFields::kFirstRowId ] = std::to_string (first_row_id.value ());
174+ }
175+ if (added_rows.has_value ()) {
176+ summary[SnapshotSummaryFields::kAddedRows ] = std::to_string (added_rows.value ());
177+ }
178+ return std::make_unique<Snapshot>(Snapshot{
179+ .snapshot_id = snapshot_id,
180+ .parent_snapshot_id = parent_snapshot_id,
181+ .sequence_number = sequence_number,
182+ .timestamp_ms = timestamp_ms,
183+ .manifest_list = std::move (manifest_list),
184+ .summary = std::move (summary),
185+ .schema_id = schema_id,
186+ });
187+ }
188+
88189Result<SnapshotCache::ManifestsCache> SnapshotCache::InitManifestsCache (
89190 const Snapshot* snapshot, std::shared_ptr<FileIO> file_io) {
90191 if (file_io == nullptr ) {
0 commit comments