Skip to content

Commit 349eb92

Browse files
authored
refactor: optimize SnapshotUtil ancestor methods with early return (#560)
1 parent f955a55 commit 349eb92

File tree

2 files changed

+171
-68
lines changed

2 files changed

+171
-68
lines changed

src/iceberg/util/snapshot_util.cc

Lines changed: 92 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,13 @@ namespace iceberg {
4141

4242
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
4343
const Table& table, int64_t snapshot_id) {
44-
return table.SnapshotById(snapshot_id).and_then([&table](const auto& snapshot) {
45-
return AncestorsOf(table, snapshot);
46-
});
44+
return AncestorsOf(*table.metadata(), snapshot_id);
45+
}
46+
47+
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
48+
const TableMetadata& metadata, int64_t snapshot_id) {
49+
return AncestorsOf(snapshot_id,
50+
[&metadata](int64_t id) { return metadata.SnapshotById(id); });
4751
}
4852

4953
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
@@ -54,72 +58,96 @@ Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
5458
});
5559
}
5660

57-
Result<bool> SnapshotUtil::IsAncestorOf(const Table& table, int64_t snapshot_id,
61+
Result<bool> SnapshotUtil::IsAncestorOf(const Table& table,
5862
int64_t ancestor_snapshot_id) {
59-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
60-
return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& snapshot) {
61-
return snapshot != nullptr && snapshot->snapshot_id == ancestor_snapshot_id;
62-
});
63+
return IsAncestorOf(*table.metadata(), ancestor_snapshot_id);
6364
}
6465

65-
Result<bool> SnapshotUtil::IsAncestorOf(
66-
int64_t snapshot_id, int64_t ancestor_snapshot_id,
67-
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
68-
ICEBERG_ASSIGN_OR_RAISE(auto snapshot, lookup(snapshot_id));
69-
ICEBERG_CHECK(snapshot != nullptr, "Cannot find snapshot: {}", snapshot_id);
70-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(snapshot, lookup));
71-
return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& ancestor) {
72-
return ancestor != nullptr && ancestor->snapshot_id == ancestor_snapshot_id;
73-
});
66+
Result<bool> SnapshotUtil::IsAncestorOf(const TableMetadata& metadata,
67+
int64_t ancestor_snapshot_id) {
68+
ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
69+
ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
70+
return IsAncestorOf(metadata, current->snapshot_id, ancestor_snapshot_id);
7471
}
7572

76-
Result<bool> SnapshotUtil::IsAncestorOf(const Table& table,
73+
Result<bool> SnapshotUtil::IsAncestorOf(const Table& table, int64_t snapshot_id,
7774
int64_t ancestor_snapshot_id) {
78-
ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot());
79-
ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
80-
return IsAncestorOf(table, current->snapshot_id, ancestor_snapshot_id);
75+
return IsAncestorOf(*table.metadata(), snapshot_id, ancestor_snapshot_id);
8176
}
8277

8378
Result<bool> SnapshotUtil::IsAncestorOf(const TableMetadata& metadata,
79+
int64_t snapshot_id,
8480
int64_t ancestor_snapshot_id) {
85-
ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
86-
ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
81+
return IsAncestorOf(snapshot_id, ancestor_snapshot_id,
82+
[&metadata](int64_t id) { return metadata.SnapshotById(id); });
83+
}
8784

88-
// Create a lookup function that uses the metadata
89-
auto lookup = [&metadata](int64_t id) -> Result<std::shared_ptr<Snapshot>> {
90-
return metadata.SnapshotById(id);
91-
};
85+
Result<bool> SnapshotUtil::IsAncestorOf(
86+
int64_t snapshot_id, int64_t ancestor_snapshot_id,
87+
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
88+
ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id));
89+
ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id);
9290

93-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(current->snapshot_id, lookup));
94-
return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& snapshot) {
95-
return snapshot != nullptr && snapshot->snapshot_id == ancestor_snapshot_id;
96-
});
91+
if (snapshot_id == ancestor_snapshot_id) {
92+
return true;
93+
}
94+
95+
while (current != nullptr && current->parent_snapshot_id.has_value()) {
96+
int64_t parent_id = current->parent_snapshot_id.value();
97+
auto parent_result = lookup(parent_id);
98+
ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; });
99+
if (parent_id == ancestor_snapshot_id) {
100+
return true;
101+
}
102+
current = std::move(parent_result.value());
103+
}
104+
105+
return false;
97106
}
98107

99108
Result<bool> SnapshotUtil::IsParentAncestorOf(const Table& table, int64_t snapshot_id,
100109
int64_t ancestor_parent_snapshot_id) {
101-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
102-
return std::ranges::any_of(
103-
ancestors, [ancestor_parent_snapshot_id](const auto& snapshot) {
104-
return snapshot != nullptr && snapshot->parent_snapshot_id.has_value() &&
105-
snapshot->parent_snapshot_id.value() == ancestor_parent_snapshot_id;
106-
});
110+
return IsParentAncestorOf(*table.metadata(), snapshot_id, ancestor_parent_snapshot_id);
111+
}
112+
113+
Result<bool> SnapshotUtil::IsParentAncestorOf(const TableMetadata& metadata,
114+
int64_t snapshot_id,
115+
int64_t ancestor_parent_snapshot_id) {
116+
return IsParentAncestorOf(
117+
snapshot_id, ancestor_parent_snapshot_id,
118+
[&metadata](int64_t id) { return metadata.SnapshotById(id); });
119+
}
120+
121+
Result<bool> SnapshotUtil::IsParentAncestorOf(
122+
int64_t snapshot_id, int64_t ancestor_parent_snapshot_id,
123+
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
124+
ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id));
125+
ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id);
126+
127+
while (current != nullptr) {
128+
if (!current->parent_snapshot_id.has_value()) {
129+
break;
130+
}
131+
if (current->parent_snapshot_id.value() == ancestor_parent_snapshot_id) {
132+
return true;
133+
}
134+
auto parent_result = lookup(current->parent_snapshot_id.value());
135+
ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; });
136+
current = std::move(parent_result.value());
137+
}
138+
139+
return false;
107140
}
108141

109142
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::CurrentAncestors(
110143
const Table& table) {
111-
ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot());
112-
return AncestorsOf(table, current);
144+
return CurrentAncestors(*table.metadata());
113145
}
114146

115147
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::CurrentAncestors(
116148
const TableMetadata& metadata) {
117149
ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
118-
auto lookup = [&metadata](int64_t id) -> Result<std::shared_ptr<Snapshot>> {
119-
return metadata.SnapshotById(id);
120-
};
121-
122-
return AncestorsOf(current, lookup);
150+
return AncestorsOf(metadata, current);
123151
}
124152

125153
Result<std::vector<int64_t>> SnapshotUtil::CurrentAncestorIds(const Table& table) {
@@ -137,7 +165,12 @@ Result<std::optional<std::shared_ptr<Snapshot>>> SnapshotUtil::OldestAncestor(
137165

138166
Result<std::optional<std::shared_ptr<Snapshot>>> SnapshotUtil::OldestAncestorOf(
139167
const Table& table, int64_t snapshot_id) {
140-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
168+
return OldestAncestorOf(*table.metadata(), snapshot_id);
169+
}
170+
171+
Result<std::optional<std::shared_ptr<Snapshot>>> SnapshotUtil::OldestAncestorOf(
172+
const TableMetadata& metadata, int64_t snapshot_id) {
173+
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(metadata, snapshot_id));
141174
if (ancestors.empty()) {
142175
return std::nullopt;
143176
}
@@ -151,7 +184,7 @@ Result<std::optional<std::shared_ptr<Snapshot>>> SnapshotUtil::OldestAncestorAft
151184
auto current = std::move(current_result.value());
152185

153186
std::optional<std::shared_ptr<Snapshot>> last_snapshot = std::nullopt;
154-
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, current));
187+
ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(*table.metadata(), current));
155188
for (const auto& snapshot : ancestors) {
156189
auto snapshot_timestamp_ms = snapshot->timestamp_ms;
157190
if (snapshot_timestamp_ms < timestamp_ms) {
@@ -200,29 +233,36 @@ Result<std::vector<int64_t>> SnapshotUtil::AncestorIdsBetween(
200233
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsBetween(
201234
const Table& table, int64_t latest_snapshot_id,
202235
std::optional<int64_t> oldest_snapshot_id) {
203-
ICEBERG_ASSIGN_OR_RAISE(auto start, table.SnapshotById(latest_snapshot_id));
236+
return AncestorsBetween(*table.metadata(), latest_snapshot_id, oldest_snapshot_id);
237+
}
238+
239+
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsBetween(
240+
const TableMetadata& metadata, int64_t latest_snapshot_id,
241+
std::optional<int64_t> oldest_snapshot_id) {
242+
ICEBERG_ASSIGN_OR_RAISE(auto start, metadata.SnapshotById(latest_snapshot_id));
204243

205244
if (oldest_snapshot_id.has_value()) {
206245
if (latest_snapshot_id == oldest_snapshot_id.value()) {
207246
return {};
208247
}
209248

210249
return AncestorsOf(start,
211-
[&table, oldest_snapshot_id = oldest_snapshot_id.value()](
250+
[&metadata, oldest_snapshot_id = oldest_snapshot_id.value()](
212251
int64_t id) -> Result<std::shared_ptr<Snapshot>> {
213252
if (id == oldest_snapshot_id) {
214253
return nullptr;
215254
}
216-
return table.SnapshotById(id);
255+
return metadata.SnapshotById(id);
217256
});
218257
} else {
219-
return AncestorsOf(table, start);
258+
return AncestorsOf(metadata, start);
220259
}
221260
}
222261

223262
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
224-
const Table& table, const std::shared_ptr<Snapshot>& snapshot) {
225-
return AncestorsOf(snapshot, [&table](int64_t id) { return table.SnapshotById(id); });
263+
const TableMetadata& metadata, const std::shared_ptr<Snapshot>& snapshot) {
264+
return AncestorsOf(snapshot,
265+
[&metadata](int64_t id) { return metadata.SnapshotById(id); });
226266
}
227267

228268
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(

src/iceberg/util/snapshot_util_internal.h

Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ class ICEBERG_EXPORT SnapshotUtil {
4444
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(const Table& table,
4545
int64_t snapshot_id);
4646

47+
/// \brief Returns a vector of ancestors of the given snapshot.
48+
///
49+
/// \param metadata The table metadata
50+
/// \param snapshot_id The snapshot ID to start from
51+
/// \return A vector of ancestor snapshots
52+
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(
53+
const TableMetadata& metadata, int64_t snapshot_id);
54+
4755
/// \brief Returns a vector of ancestors of the given snapshot.
4856
///
4957
/// \param snapshot_id The snapshot ID to start from
@@ -53,6 +61,23 @@ class ICEBERG_EXPORT SnapshotUtil {
5361
int64_t snapshot_id,
5462
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
5563

64+
/// \brief Returns whether ancestor_snapshot_id is an ancestor of the table's current
65+
/// state.
66+
///
67+
/// \param table The table to check
68+
/// \param ancestor_snapshot_id The ancestor snapshot ID to check for
69+
/// \return true if ancestor_snapshot_id is an ancestor of the current snapshot
70+
static Result<bool> IsAncestorOf(const Table& table, int64_t ancestor_snapshot_id);
71+
72+
/// \brief Returns whether ancestor_snapshot_id is an ancestor of the metadata's current
73+
/// state.
74+
///
75+
/// \param metadata The table metadata to check
76+
/// \param ancestor_snapshot_id The ancestor snapshot ID to check for
77+
/// \return true if ancestor_snapshot_id is an ancestor of the current snapshot
78+
static Result<bool> IsAncestorOf(const TableMetadata& metadata,
79+
int64_t ancestor_snapshot_id);
80+
5681
/// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id.
5782
///
5883
/// \param table The table to check
@@ -62,6 +87,15 @@ class ICEBERG_EXPORT SnapshotUtil {
6287
static Result<bool> IsAncestorOf(const Table& table, int64_t snapshot_id,
6388
int64_t ancestor_snapshot_id);
6489

90+
/// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id.
91+
///
92+
/// \param metadata The table metadata to check
93+
/// \param snapshot_id The snapshot ID to check
94+
/// \param ancestor_snapshot_id The ancestor snapshot ID to check for
95+
/// \return true if ancestor_snapshot_id is an ancestor of snapshot_id
96+
static Result<bool> IsAncestorOf(const TableMetadata& metadata, int64_t snapshot_id,
97+
int64_t ancestor_snapshot_id);
98+
6599
/// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id using the
66100
/// given lookup function.
67101
///
@@ -73,32 +107,37 @@ class ICEBERG_EXPORT SnapshotUtil {
73107
int64_t snapshot_id, int64_t ancestor_snapshot_id,
74108
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
75109

76-
/// \brief Returns whether ancestor_snapshot_id is an ancestor of the table's current
77-
/// state.
110+
/// \brief Returns whether some ancestor of snapshot_id has parentId matches
111+
/// ancestor_parent_snapshot_id.
78112
///
79113
/// \param table The table to check
80-
/// \param ancestor_snapshot_id The ancestor snapshot ID to check for
81-
/// \return true if ancestor_snapshot_id is an ancestor of the current snapshot
82-
static Result<bool> IsAncestorOf(const Table& table, int64_t ancestor_snapshot_id);
114+
/// \param snapshot_id The snapshot ID to check
115+
/// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for
116+
/// \return true if any ancestor has the given parent ID
117+
static Result<bool> IsParentAncestorOf(const Table& table, int64_t snapshot_id,
118+
int64_t ancestor_parent_snapshot_id);
83119

84-
/// \brief Returns whether ancestor_snapshot_id is an ancestor of the metadata's current
85-
/// state.
120+
/// \brief Returns whether some ancestor of snapshot_id has parentId matches
121+
/// ancestor_parent_snapshot_id.
86122
///
87123
/// \param metadata The table metadata to check
88-
/// \param ancestor_snapshot_id The ancestor snapshot ID to check for
89-
/// \return true if ancestor_snapshot_id is an ancestor of the current snapshot
90-
static Result<bool> IsAncestorOf(const TableMetadata& metadata,
91-
int64_t ancestor_snapshot_id);
124+
/// \param snapshot_id The snapshot ID to check
125+
/// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for
126+
/// \return true if any ancestor has the given parent ID
127+
static Result<bool> IsParentAncestorOf(const TableMetadata& metadata,
128+
int64_t snapshot_id,
129+
int64_t ancestor_parent_snapshot_id);
92130

93131
/// \brief Returns whether some ancestor of snapshot_id has parentId matches
94132
/// ancestor_parent_snapshot_id.
95133
///
96-
/// \param table The table to check
97134
/// \param snapshot_id The snapshot ID to check
98135
/// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for
136+
/// \param lookup Function to lookup snapshots by ID
99137
/// \return true if any ancestor has the given parent ID
100-
static Result<bool> IsParentAncestorOf(const Table& table, int64_t snapshot_id,
101-
int64_t ancestor_parent_snapshot_id);
138+
static Result<bool> IsParentAncestorOf(
139+
int64_t snapshot_id, int64_t ancestor_parent_snapshot_id,
140+
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
102141

103142
/// \brief Returns a vector that traverses the table's snapshots from the current to the
104143
/// last known ancestor.
@@ -147,6 +186,19 @@ class ICEBERG_EXPORT SnapshotUtil {
147186
static Result<std::optional<std::shared_ptr<Snapshot>>> OldestAncestorOf(
148187
const Table& table, int64_t snapshot_id);
149188

189+
/// \brief Traverses the history and finds the oldest ancestor of the specified
190+
/// snapshot.
191+
///
192+
/// Oldest ancestor is defined as the ancestor snapshot whose parent is null or has been
193+
/// expired. If the specified snapshot has no parent or parent has been expired, the
194+
/// specified snapshot itself is returned.
195+
///
196+
/// \param metadata The table metadata
197+
/// \param snapshot_id The ID of the snapshot to find the oldest ancestor
198+
/// \return The oldest snapshot, or nullopt if not found
199+
static Result<std::optional<std::shared_ptr<Snapshot>>> OldestAncestorOf(
200+
const TableMetadata& metadata, int64_t snapshot_id);
201+
150202
/// \brief Traverses the history of the table's current snapshot, finds the oldest
151203
/// snapshot that was committed either at or after a given time.
152204
///
@@ -192,6 +244,17 @@ class ICEBERG_EXPORT SnapshotUtil {
192244
const Table& table, int64_t latest_snapshot_id,
193245
std::optional<int64_t> oldest_snapshot_id);
194246

247+
/// \brief Returns a vector of ancestors between two snapshots.
248+
///
249+
/// \param metadata The table metadata
250+
/// \param latest_snapshot_id The latest snapshot ID
251+
/// \param oldest_snapshot_id The oldest snapshot ID (optional, nullopt means all
252+
/// ancestors)
253+
/// \return A vector of ancestor snapshots between the two snapshots
254+
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsBetween(
255+
const TableMetadata& metadata, int64_t latest_snapshot_id,
256+
std::optional<int64_t> oldest_snapshot_id);
257+
195258
/// \brief Traverses the history of the table's current snapshot and finds the snapshot
196259
/// with the given snapshot id as its parent.
197260
///
@@ -317,11 +380,11 @@ class ICEBERG_EXPORT SnapshotUtil {
317380
private:
318381
/// \brief Helper function to traverse ancestors of a snapshot.
319382
///
320-
/// \param table The table
383+
/// \param metadata The table metadata
321384
/// \param snapshot The snapshot to start from
322385
/// \return A vector of ancestor snapshots
323386
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(
324-
const Table& table, const std::shared_ptr<Snapshot>& snapshot);
387+
const TableMetadata& metadata, const std::shared_ptr<Snapshot>& snapshot);
325388

326389
/// \brief Helper function to traverse ancestors of a snapshot using a lookup function.
327390
///

0 commit comments

Comments
 (0)