Skip to content

Commit 873b1ce

Browse files
committed
feat: implement update stastics
1 parent 437f252 commit 873b1ce

17 files changed

+562
-6
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ set(ICEBERG_SOURCES
9292
update/update_properties.cc
9393
update/update_schema.cc
9494
update/update_sort_order.cc
95+
update/update_statistics.cc
9596
util/bucket_util.cc
9697
util/content_file_util.cc
9798
util/conversions.cc

src/iceberg/json_internal.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ constexpr std::string_view kActionSetSnapshotRef = "set-snapshot-ref";
191191
constexpr std::string_view kActionSetProperties = "set-properties";
192192
constexpr std::string_view kActionRemoveProperties = "remove-properties";
193193
constexpr std::string_view kActionSetLocation = "set-location";
194+
constexpr std::string_view kActionSetStatistics = "set-statistics";
195+
constexpr std::string_view kActionRemoveStatistics = "remove-statistics";
194196

195197
// TableUpdate field constants
196198
constexpr std::string_view kUUID = "uuid";
@@ -1399,6 +1401,18 @@ nlohmann::json ToJson(const TableUpdate& update) {
13991401
json[kLocation] = u.location();
14001402
break;
14011403
}
1404+
case TableUpdate::Kind::kSetStatistics: {
1405+
const auto& u = internal::checked_cast<const table::SetStatistics&>(update);
1406+
json[kAction] = kActionSetStatistics;
1407+
json[kStatistics] = ToJson(*u.statistics_file());
1408+
break;
1409+
}
1410+
case TableUpdate::Kind::kRemoveStatistics: {
1411+
const auto& u = internal::checked_cast<const table::RemoveStatistics&>(update);
1412+
json[kAction] = kActionRemoveStatistics;
1413+
json[kSnapshotId] = u.snapshot_id();
1414+
break;
1415+
}
14021416
}
14031417
return json;
14041418
}

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ iceberg_sources = files(
110110
'update/update_properties.cc',
111111
'update/update_schema.cc',
112112
'update/update_sort_order.cc',
113+
'update/update_statistics.cc',
113114
'util/bucket_util.cc',
114115
'util/content_file_util.cc',
115116
'util/conversions.cc',

src/iceberg/table.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "iceberg/update/update_partition_spec.h"
3636
#include "iceberg/update/update_properties.h"
3737
#include "iceberg/update/update_schema.h"
38+
#include "iceberg/update/update_statistics.h"
3839
#include "iceberg/util/macros.h"
3940

4041
namespace iceberg {
@@ -199,6 +200,13 @@ Result<std::shared_ptr<UpdateLocation>> Table::NewUpdateLocation() {
199200
return transaction->NewUpdateLocation();
200201
}
201202

203+
Result<std::shared_ptr<UpdateStatistics>> Table::NewUpdateStatistics() {
204+
ICEBERG_ASSIGN_OR_RAISE(
205+
auto transaction, Transaction::Make(shared_from_this(), Transaction::Kind::kUpdate,
206+
/*auto_commit=*/true));
207+
return transaction->NewUpdateStatistics();
208+
}
209+
202210
Result<std::shared_ptr<StagedTable>> StagedTable::Make(
203211
TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
204212
std::string metadata_location, std::shared_ptr<FileIO> io,

src/iceberg/table.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this<Table> {
151151
/// \brief Create a new ExpireSnapshots to remove expired snapshots and commit the
152152
/// changes.
153153
virtual Result<std::shared_ptr<ExpireSnapshots>> NewExpireSnapshots();
154+
155+
/// \brief Create a new UpdateStatistics to update the table statistics and commit the
156+
/// changes.
157+
virtual Result<std::shared_ptr<UpdateStatistics>> NewUpdateStatistics();
154158

155159
/// \brief Create a new UpdateLocation to update the table location and commit the
156160
/// changes.

src/iceberg/table_metadata.cc

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,14 +612,16 @@ class TableMetadataBuilder::Impl {
612612
Status SetCurrentSchema(int32_t schema_id);
613613
Status RemoveSchemas(const std::unordered_set<int32_t>& schema_ids);
614614
Result<int32_t> AddSchema(const Schema& schema, int32_t new_last_column_id);
615-
void SetLocation(std::string_view location);
615+
Status SetLocation(std::string_view location);
616616
Status AddSnapshot(std::shared_ptr<Snapshot> snapshot);
617617
Status SetBranchSnapshot(int64_t snapshot_id, const std::string& branch);
618618
Status SetBranchSnapshot(std::shared_ptr<Snapshot> snapshot, const std::string& branch);
619619
Status SetRef(const std::string& name, std::shared_ptr<SnapshotRef> ref);
620620
Status RemoveRef(const std::string& name);
621621
Status RemoveSnapshots(const std::vector<int64_t>& snapshot_ids);
622622
Status RemovePartitionSpecs(const std::vector<int32_t>& spec_ids);
623+
Status SetStatistics(const std::shared_ptr<StatisticsFile>& statistics_file);
624+
Status RemoveStatistics(int64_t snapshot_id);
623625

624626
Result<std::unique_ptr<TableMetadata>> Build();
625627

@@ -1032,12 +1034,13 @@ Result<int32_t> TableMetadataBuilder::Impl::AddSchema(const Schema& schema,
10321034
return new_schema_id;
10331035
}
10341036

1035-
void TableMetadataBuilder::Impl::SetLocation(std::string_view location) {
1037+
Status TableMetadataBuilder::Impl::SetLocation(std::string_view location) {
10361038
if (location == metadata_.location) {
1037-
return;
1039+
return {};
10381040
}
10391041
metadata_.location = std::string(location);
10401042
changes_.push_back(std::make_unique<table::SetLocation>(std::string(location)));
1043+
return {};
10411044
}
10421045

10431046
Status TableMetadataBuilder::Impl::AddSnapshot(std::shared_ptr<Snapshot> snapshot) {
@@ -1173,6 +1176,45 @@ Status TableMetadataBuilder::Impl::SetRef(const std::string& name,
11731176
return {};
11741177
}
11751178

1179+
Status TableMetadataBuilder::Impl::SetStatistics(
1180+
const std::shared_ptr<StatisticsFile>& statistics_file) {
1181+
ICEBERG_CHECK(statistics_file != nullptr, "Cannot set null statistics file");
1182+
1183+
// Find and replace existing statistics for the same snapshot_id, or add new one
1184+
auto it = std::ranges::find_if(
1185+
metadata_.statistics,
1186+
[snapshot_id = statistics_file->snapshot_id](const auto& stat) {
1187+
return stat && stat->snapshot_id == snapshot_id;
1188+
});
1189+
1190+
if (it != metadata_.statistics.end()) {
1191+
*it = statistics_file;
1192+
} else {
1193+
metadata_.statistics.push_back(statistics_file);
1194+
}
1195+
1196+
changes_.push_back(std::make_unique<table::SetStatistics>(statistics_file));
1197+
return {};
1198+
}
1199+
1200+
Status TableMetadataBuilder::Impl::RemoveStatistics(int64_t snapshot_id) {
1201+
auto it = std::ranges::find_if(metadata_.statistics, [snapshot_id](const auto& stat) {
1202+
return stat && stat->snapshot_id == snapshot_id;
1203+
});
1204+
1205+
if (it == metadata_.statistics.end()) {
1206+
return {};
1207+
}
1208+
1209+
// Remove statistics for the given snapshot_id
1210+
std::erase_if(metadata_.statistics, [snapshot_id](const auto& stat) {
1211+
return stat && stat->snapshot_id == snapshot_id;
1212+
});
1213+
1214+
changes_.push_back(std::make_unique<table::RemoveStatistics>(snapshot_id));
1215+
return {};
1216+
}
1217+
11761218
std::unordered_set<int64_t> TableMetadataBuilder::Impl::IntermediateSnapshotIdSet(
11771219
int64_t current_snapshot_id) const {
11781220
std::unordered_set<int64_t> added_snapshot_ids;
@@ -1590,11 +1632,13 @@ TableMetadataBuilder& TableMetadataBuilder::SuppressHistoricalSnapshots() {
15901632

15911633
TableMetadataBuilder& TableMetadataBuilder::SetStatistics(
15921634
const std::shared_ptr<StatisticsFile>& statistics_file) {
1593-
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
1635+
impl_->SetStatistics(statistics_file);
1636+
return *this;
15941637
}
15951638

15961639
TableMetadataBuilder& TableMetadataBuilder::RemoveStatistics(int64_t snapshot_id) {
1597-
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
1640+
impl_->RemoveStatistics(snapshot_id);
1641+
return *this;
15981642
}
15991643

16001644
TableMetadataBuilder& TableMetadataBuilder::SetPartitionStatistics(

src/iceberg/table_update.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "iceberg/exception.h"
2323
#include "iceberg/schema.h"
2424
#include "iceberg/sort_order.h"
25+
#include "iceberg/statistics_file.h"
2526
#include "iceberg/table_metadata.h"
2627
#include "iceberg/table_requirements.h"
2728

@@ -446,4 +447,50 @@ std::unique_ptr<TableUpdate> SetLocation::Clone() const {
446447
return std::make_unique<SetLocation>(location_);
447448
}
448449

450+
// SetStatistics
451+
452+
int64_t SetStatistics::snapshot_id() const { return statistics_file_->snapshot_id; }
453+
454+
void SetStatistics::ApplyTo(TableMetadataBuilder& builder) const {
455+
builder.SetStatistics(statistics_file_);
456+
}
457+
458+
void SetStatistics::GenerateRequirements(TableUpdateContext& context) const {
459+
// SetStatistics doesn't generate any requirements
460+
}
461+
462+
bool SetStatistics::Equals(const TableUpdate& other) const {
463+
if (other.kind() != Kind::kSetStatistics) {
464+
return false;
465+
}
466+
const auto& other_set = static_cast<const SetStatistics&>(other);
467+
return *statistics_file_ == *other_set.statistics_file_;
468+
}
469+
470+
std::unique_ptr<TableUpdate> SetStatistics::Clone() const {
471+
return std::make_unique<SetStatistics>(statistics_file_);
472+
}
473+
474+
// RemoveStatistics
475+
476+
void RemoveStatistics::ApplyTo(TableMetadataBuilder& builder) const {
477+
builder.RemoveStatistics(snapshot_id_);
478+
}
479+
480+
void RemoveStatistics::GenerateRequirements(TableUpdateContext& context) const {
481+
// RemoveStatistics doesn't generate any requirements
482+
}
483+
484+
bool RemoveStatistics::Equals(const TableUpdate& other) const {
485+
if (other.kind() != Kind::kRemoveStatistics) {
486+
return false;
487+
}
488+
const auto& other_remove = static_cast<const RemoveStatistics&>(other);
489+
return snapshot_id_ == other_remove.snapshot_id_;
490+
}
491+
492+
std::unique_ptr<TableUpdate> RemoveStatistics::Clone() const {
493+
return std::make_unique<RemoveStatistics>(snapshot_id_);
494+
}
495+
449496
} // namespace iceberg::table

src/iceberg/table_update.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class ICEBERG_EXPORT TableUpdate {
5959
kSetProperties,
6060
kRemoveProperties,
6161
kSetLocation,
62+
kSetStatistics,
63+
kRemoveStatistics,
6264
};
6365

6466
virtual ~TableUpdate();
@@ -509,6 +511,53 @@ class ICEBERG_EXPORT SetLocation : public TableUpdate {
509511
std::string location_;
510512
};
511513

514+
/// \brief Represents setting statistics for a snapshot
515+
class ICEBERG_EXPORT SetStatistics : public TableUpdate {
516+
public:
517+
explicit SetStatistics(std::shared_ptr<StatisticsFile> statistics_file)
518+
: statistics_file_(std::move(statistics_file)) {}
519+
520+
int64_t snapshot_id() const;
521+
522+
const std::shared_ptr<StatisticsFile>& statistics_file() const {
523+
return statistics_file_;
524+
}
525+
526+
void ApplyTo(TableMetadataBuilder& builder) const override;
527+
528+
void GenerateRequirements(TableUpdateContext& context) const override;
529+
530+
Kind kind() const override { return Kind::kSetStatistics; }
531+
532+
bool Equals(const TableUpdate& other) const override;
533+
534+
std::unique_ptr<TableUpdate> Clone() const override;
535+
536+
private:
537+
std::shared_ptr<StatisticsFile> statistics_file_;
538+
};
539+
540+
/// \brief Represents removing statistics for a snapshot
541+
class ICEBERG_EXPORT RemoveStatistics : public TableUpdate {
542+
public:
543+
explicit RemoveStatistics(int64_t snapshot_id) : snapshot_id_(snapshot_id) {}
544+
545+
int64_t snapshot_id() const { return snapshot_id_; }
546+
547+
void ApplyTo(TableMetadataBuilder& builder) const override;
548+
549+
void GenerateRequirements(TableUpdateContext& context) const override;
550+
551+
Kind kind() const override { return Kind::kRemoveStatistics; }
552+
553+
bool Equals(const TableUpdate& other) const override;
554+
555+
std::unique_ptr<TableUpdate> Clone() const override;
556+
557+
private:
558+
int64_t snapshot_id_;
559+
};
560+
512561
} // namespace table
513562

514563
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ if(ICEBERG_BUILD_BUNDLE)
177177
update_partition_spec_test.cc
178178
update_properties_test.cc
179179
update_schema_test.cc
180-
update_sort_order_test.cc)
180+
update_sort_order_test.cc
181+
update_statistics_test.cc)
181182

182183
add_iceberg_test(data_writer_test USE_BUNDLE SOURCES data_writer_test.cc)
183184

0 commit comments

Comments
 (0)