Skip to content

Commit 07c45c3

Browse files
committed
fix review
1 parent 6e852a0 commit 07c45c3

File tree

9 files changed

+107
-35
lines changed

9 files changed

+107
-35
lines changed

src/iceberg/json_internal.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,6 +1593,17 @@ Result<std::unique_ptr<TableUpdate>> TableUpdateFromJson(const nlohmann::json& j
15931593
ICEBERG_ASSIGN_OR_RAISE(auto location, GetJsonValue<std::string>(json, kLocation));
15941594
return std::make_unique<table::SetLocation>(std::move(location));
15951595
}
1596+
if (action == kActionSetStatistics) {
1597+
ICEBERG_ASSIGN_OR_RAISE(auto statistics_json,
1598+
GetJsonValue<nlohmann::json>(json, kStatistics));
1599+
ICEBERG_ASSIGN_OR_RAISE(auto statistics_file,
1600+
StatisticsFileFromJson(statistics_json));
1601+
return std::make_unique<table::SetStatistics>(std::move(statistics_file));
1602+
}
1603+
if (action == kActionRemoveStatistics) {
1604+
ICEBERG_ASSIGN_OR_RAISE(auto snapshot_id, GetJsonValue<int64_t>(json, kSnapshotId));
1605+
return std::make_unique<table::RemoveStatistics>(snapshot_id);
1606+
}
15961607

15971608
return JsonParseError("Unknown table update action: {}", action);
15981609
}

src/iceberg/table_metadata.cc

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ class TableMetadataBuilder::Impl {
613613
Status SetCurrentSchema(int32_t schema_id);
614614
Status RemoveSchemas(const std::unordered_set<int32_t>& schema_ids);
615615
Result<int32_t> AddSchema(const Schema& schema, int32_t new_last_column_id);
616-
Status SetLocation(std::string_view location);
616+
void SetLocation(std::string_view location);
617617
Status AddSnapshot(std::shared_ptr<Snapshot> snapshot);
618618
Status SetBranchSnapshot(int64_t snapshot_id, const std::string& branch);
619619
Status SetBranchSnapshot(std::shared_ptr<Snapshot> snapshot, const std::string& branch);
@@ -1035,13 +1035,13 @@ Result<int32_t> TableMetadataBuilder::Impl::AddSchema(const Schema& schema,
10351035
return new_schema_id;
10361036
}
10371037

1038-
Status TableMetadataBuilder::Impl::SetLocation(std::string_view location) {
1038+
void TableMetadataBuilder::Impl::SetLocation(std::string_view location) {
10391039
if (location == metadata_.location) {
1040-
return {};
1040+
return;
10411041
}
10421042
metadata_.location = std::string(location);
10431043
changes_.push_back(std::make_unique<table::SetLocation>(std::string(location)));
1044-
return {};
1044+
return;
10451045
}
10461046

10471047
Status TableMetadataBuilder::Impl::AddSnapshot(std::shared_ptr<Snapshot> snapshot) {
@@ -1179,7 +1179,7 @@ Status TableMetadataBuilder::Impl::SetRef(const std::string& name,
11791179

11801180
Status TableMetadataBuilder::Impl::SetStatistics(
11811181
const std::shared_ptr<StatisticsFile>& statistics_file) {
1182-
ICEBERG_CHECK(statistics_file != nullptr, "Cannot set null statistics file");
1182+
ICEBERG_PRECHECK(statistics_file != nullptr, "Cannot set null statistics file");
11831183

11841184
// Find and replace existing statistics for the same snapshot_id, or add new one
11851185
auto it = std::ranges::find_if(
@@ -1194,24 +1194,19 @@ Status TableMetadataBuilder::Impl::SetStatistics(
11941194
metadata_.statistics.push_back(statistics_file);
11951195
}
11961196

1197-
changes_.push_back(std::make_unique<table::SetStatistics>(statistics_file));
1197+
changes_.push_back(std::make_unique<table::SetStatistics>(std::move(statistics_file)));
11981198
return {};
11991199
}
12001200

12011201
Status TableMetadataBuilder::Impl::RemoveStatistics(int64_t snapshot_id) {
1202-
auto it = std::ranges::find_if(metadata_.statistics, [snapshot_id](const auto& stat) {
1203-
return stat && stat->snapshot_id == snapshot_id;
1204-
});
1205-
1206-
if (it == metadata_.statistics.end()) {
1202+
auto removed_count =
1203+
std::erase_if(metadata_.statistics, [snapshot_id](const auto& stat) {
1204+
return stat && stat->snapshot_id == snapshot_id;
1205+
});
1206+
if (removed_count == 0) {
12071207
return {};
12081208
}
12091209

1210-
// Remove statistics for the given snapshot_id
1211-
std::erase_if(metadata_.statistics, [snapshot_id](const auto& stat) {
1212-
return stat && stat->snapshot_id == snapshot_id;
1213-
});
1214-
12151210
changes_.push_back(std::make_unique<table::RemoveStatistics>(snapshot_id));
12161211
return {};
12171212
}
@@ -1632,7 +1627,7 @@ TableMetadataBuilder& TableMetadataBuilder::SuppressHistoricalSnapshots() {
16321627
}
16331628

16341629
TableMetadataBuilder& TableMetadataBuilder::SetStatistics(
1635-
const std::shared_ptr<StatisticsFile>& statistics_file) {
1630+
std::shared_ptr<StatisticsFile> statistics_file) {
16361631
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->SetStatistics(statistics_file));
16371632
return *this;
16381633
}
@@ -1665,7 +1660,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveProperties(
16651660
}
16661661

16671662
TableMetadataBuilder& TableMetadataBuilder::SetLocation(std::string_view location) {
1668-
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->SetLocation(location));
1663+
impl_->SetLocation(location);
16691664
return *this;
16701665
}
16711666

src/iceberg/table_metadata.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,7 @@ class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector {
399399
///
400400
/// \param statistics_file The statistics file to set
401401
/// \return Reference to this builder for method chaining
402-
TableMetadataBuilder& SetStatistics(
403-
const std::shared_ptr<StatisticsFile>& statistics_file);
402+
TableMetadataBuilder& SetStatistics(std::shared_ptr<StatisticsFile> statistics_file);
404403

405404
/// \brief Remove table statistics by snapshot ID
406405
///

src/iceberg/table_update.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,13 @@ bool SetStatistics::Equals(const TableUpdate& other) const {
464464
return false;
465465
}
466466
const auto& other_set = static_cast<const SetStatistics&>(other);
467-
return *statistics_file_ == *other_set.statistics_file_;
467+
if (!statistics_file_ != !other_set.statistics_file_) {
468+
return false;
469+
}
470+
if (statistics_file_ && !(*statistics_file_ == *other_set.statistics_file_)) {
471+
return false;
472+
}
473+
return true;
468474
}
469475

470476
std::unique_ptr<TableUpdate> SetStatistics::Clone() const {

src/iceberg/test/json_internal_test.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "iceberg/snapshot.h"
3232
#include "iceberg/sort_field.h"
3333
#include "iceberg/sort_order.h"
34+
#include "iceberg/statistics_file.h"
3435
#include "iceberg/table_requirement.h"
3536
#include "iceberg/table_update.h"
3637
#include "iceberg/test/matchers.h"
@@ -508,6 +509,54 @@ TEST(JsonInternalTest, TableUpdateSetLocation) {
508509
EXPECT_EQ(*internal::checked_cast<table::SetLocation*>(parsed.value().get()), update);
509510
}
510511

512+
TEST(JsonInternalTest, TableUpdateSetStatistics) {
513+
auto stats_file = std::make_shared<StatisticsFile>();
514+
stats_file->snapshot_id = 123456789;
515+
stats_file->path = "s3://bucket/warehouse/table/metadata/stats-123456789.puffin";
516+
stats_file->file_size_in_bytes = 1024;
517+
stats_file->file_footer_size_in_bytes = 128;
518+
stats_file->blob_metadata = {BlobMetadata{.type = "ndv",
519+
.source_snapshot_id = 123456789,
520+
.source_snapshot_sequence_number = 1,
521+
.fields = {1, 2},
522+
.properties = {{"prop1", "value1"}}}};
523+
524+
table::SetStatistics update(stats_file);
525+
nlohmann::json expected = R"({
526+
"action": "set-statistics",
527+
"statistics": {
528+
"snapshot-id": 123456789,
529+
"statistics-path": "s3://bucket/warehouse/table/metadata/stats-123456789.puffin",
530+
"file-size-in-bytes": 1024,
531+
"file-footer-size-in-bytes": 128,
532+
"blob-metadata": [{
533+
"type": "ndv",
534+
"snapshot-id": 123456789,
535+
"sequence-number": 1,
536+
"fields": [1, 2],
537+
"properties": {"prop1": "value1"}
538+
}]
539+
}
540+
})"_json;
541+
542+
EXPECT_EQ(ToJson(update), expected);
543+
auto parsed = TableUpdateFromJson(expected);
544+
ASSERT_THAT(parsed, IsOk());
545+
EXPECT_EQ(*internal::checked_cast<table::SetStatistics*>(parsed.value().get()), update);
546+
}
547+
548+
TEST(JsonInternalTest, TableUpdateRemoveStatistics) {
549+
table::RemoveStatistics update(123456789);
550+
nlohmann::json expected =
551+
R"({"action":"remove-statistics","snapshot-id":123456789})"_json;
552+
553+
EXPECT_EQ(ToJson(update), expected);
554+
auto parsed = TableUpdateFromJson(expected);
555+
ASSERT_THAT(parsed, IsOk());
556+
EXPECT_EQ(*internal::checked_cast<table::RemoveStatistics*>(parsed.value().get()),
557+
update);
558+
}
559+
511560
TEST(JsonInternalTest, TableUpdateUnknownAction) {
512561
nlohmann::json json = R"({"action":"unknown-action"})"_json;
513562
auto result = TableUpdateFromJson(json);

src/iceberg/test/update_statistics_test.cc

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "iceberg/update/update_statistics.h"
2121

22+
#include <algorithm>
2223
#include <memory>
2324

2425
#include <gmock/gmock.h>
@@ -54,6 +55,16 @@ class UpdateStatisticsTest : public UpdateTestBase {
5455

5556
return stats_file;
5657
}
58+
59+
// Helper to find statistics file by snapshot_id in the result vector
60+
std::shared_ptr<StatisticsFile> FindStatistics(
61+
const std::vector<std::pair<int64_t, std::shared_ptr<StatisticsFile>>>& to_set,
62+
int64_t snapshot_id) {
63+
auto it = std::find_if(to_set.begin(), to_set.end(), [snapshot_id](const auto& p) {
64+
return p.first == snapshot_id;
65+
});
66+
return it != to_set.end() ? it->second : nullptr;
67+
}
5768
};
5869

5970
TEST_F(UpdateStatisticsTest, EmptyUpdate) {
@@ -72,7 +83,7 @@ TEST_F(UpdateStatisticsTest, SetStatistics) {
7283
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
7384
EXPECT_EQ(result.to_set.size(), 1);
7485
EXPECT_TRUE(result.to_remove.empty());
75-
EXPECT_EQ(result.to_set.at(1), stats_file);
86+
EXPECT_EQ(FindStatistics(result.to_set, 1), stats_file);
7687
}
7788

7889
TEST_F(UpdateStatisticsTest, SetMultipleStatistics) {
@@ -87,8 +98,8 @@ TEST_F(UpdateStatisticsTest, SetMultipleStatistics) {
8798
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
8899
EXPECT_EQ(result.to_set.size(), 2);
89100
EXPECT_TRUE(result.to_remove.empty());
90-
EXPECT_EQ(result.to_set.at(1), stats_file_1);
91-
EXPECT_EQ(result.to_set.at(2), stats_file_2);
101+
EXPECT_EQ(FindStatistics(result.to_set, 1), stats_file_1);
102+
EXPECT_EQ(FindStatistics(result.to_set, 2), stats_file_2);
92103
}
93104

94105
TEST_F(UpdateStatisticsTest, RemoveStatistics) {
@@ -120,7 +131,7 @@ TEST_F(UpdateStatisticsTest, SetAndRemoveDifferentSnapshots) {
120131

121132
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
122133
EXPECT_EQ(result.to_set.size(), 1);
123-
EXPECT_EQ(result.to_set.at(1), stats_file);
134+
EXPECT_EQ(FindStatistics(result.to_set, 1), stats_file);
124135
EXPECT_EQ(result.to_remove.size(), 1);
125136
EXPECT_THAT(result.to_remove, ::testing::Contains(2));
126137
}
@@ -139,8 +150,8 @@ TEST_F(UpdateStatisticsTest, ReplaceStatistics) {
139150
EXPECT_EQ(result.to_set.size(), 1);
140151
EXPECT_TRUE(result.to_remove.empty());
141152
// Should have the second one (replacement)
142-
EXPECT_EQ(result.to_set.at(1), stats_file_2);
143-
EXPECT_NE(result.to_set.at(1), stats_file_1);
153+
EXPECT_EQ(FindStatistics(result.to_set, 1), stats_file_2);
154+
EXPECT_NE(FindStatistics(result.to_set, 1), stats_file_1);
144155
}
145156

146157
TEST_F(UpdateStatisticsTest, SetThenRemoveSameSnapshot) {
@@ -168,7 +179,7 @@ TEST_F(UpdateStatisticsTest, RemoveThenSetSameSnapshot) {
168179
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
169180
EXPECT_EQ(result.to_set.size(), 1);
170181
EXPECT_TRUE(result.to_remove.empty());
171-
EXPECT_EQ(result.to_set.at(1), stats_file);
182+
EXPECT_EQ(FindStatistics(result.to_set, 1), stats_file);
172183
}
173184

174185
TEST_F(UpdateStatisticsTest, SetNullStatistics) {

src/iceberg/transaction.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ Status Transaction::Apply(PendingUpdate& update) {
195195
auto& update_statistics = internal::checked_cast<UpdateStatistics&>(update);
196196
ICEBERG_ASSIGN_OR_RAISE(auto result, update_statistics.Apply());
197197
// Apply statistics changes to the metadata builder
198-
for (const auto& [snapshot_id, stat_file] : result.to_set) {
199-
metadata_builder_->SetStatistics(stat_file);
198+
for (auto&& [_, stat_file] : result.to_set) {
199+
metadata_builder_->SetStatistics(std::move(stat_file));
200200
}
201201
for (const auto& snapshot_id : result.to_remove) {
202202
metadata_builder_->RemoveStatistics(snapshot_id);

src/iceberg/update/update_statistics.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ UpdateStatistics::UpdateStatistics(std::shared_ptr<Transaction> transaction)
4444
UpdateStatistics::~UpdateStatistics() = default;
4545

4646
UpdateStatistics& UpdateStatistics::SetStatistics(
47-
const std::shared_ptr<StatisticsFile>& statistics_file) {
47+
std::shared_ptr<StatisticsFile> statistics_file) {
4848
ICEBERG_BUILDER_CHECK(statistics_file != nullptr, "Statistics file cannot be null");
49-
statistics_to_set_[statistics_file->snapshot_id] = statistics_file;
49+
statistics_to_set_[statistics_file->snapshot_id] = std::move(statistics_file);
5050
return *this;
5151
}
5252

@@ -61,7 +61,7 @@ Result<UpdateStatistics::ApplyResult> UpdateStatistics::Apply() {
6161
ApplyResult result;
6262
for (const auto& [snapshot_id, stats] : statistics_to_set_) {
6363
if (stats) {
64-
result.to_set[snapshot_id] = stats;
64+
result.to_set.emplace_back(snapshot_id, stats);
6565
} else {
6666
result.to_remove.push_back(snapshot_id);
6767
}

src/iceberg/update/update_statistics.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <cstdint>
2323
#include <memory>
2424
#include <unordered_map>
25+
#include <utility>
2526
#include <vector>
2627

2728
#include "iceberg/iceberg_export.h"
@@ -49,7 +50,7 @@ class ICEBERG_EXPORT UpdateStatistics : public PendingUpdate {
4950
///
5051
/// \param statistics_file The statistics file to set
5152
/// \return Reference to this UpdateStatistics for chaining
52-
UpdateStatistics& SetStatistics(const std::shared_ptr<StatisticsFile>& statistics_file);
53+
UpdateStatistics& SetStatistics(std::shared_ptr<StatisticsFile> statistics_file);
5354

5455
/// \brief Remove statistics for a snapshot.
5556
///
@@ -62,7 +63,7 @@ class ICEBERG_EXPORT UpdateStatistics : public PendingUpdate {
6263
Kind kind() const final { return Kind::kUpdateStatistics; }
6364

6465
struct ApplyResult {
65-
std::unordered_map<int64_t, std::shared_ptr<StatisticsFile>> to_set;
66+
std::vector<std::pair<int64_t, std::shared_ptr<StatisticsFile>>> to_set;
6667
std::vector<int64_t> to_remove;
6768
};
6869

0 commit comments

Comments
 (0)