Skip to content

Commit 5fa02cb

Browse files
author
Innocent
committed
feat: metrics reporting for scan and commit
1 parent 721e529 commit 5fa02cb

File tree

6 files changed

+496
-0
lines changed

6 files changed

+496
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ set(ICEBERG_SOURCES
5858
manifest/v3_metadata.cc
5959
metadata_columns.cc
6060
metrics_config.cc
61+
metrics_reporters.cc
6162
name_mapping.cc
6263
partition_field.cc
6364
partition_spec.cc

src/iceberg/metrics_reporter.h

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <chrono>
23+
#include <memory>
24+
#include <string>
25+
#include <string_view>
26+
#include <utility>
27+
#include <variant>
28+
29+
#include "iceberg/iceberg_export.h"
30+
31+
namespace iceberg {
32+
33+
/// \brief Duration type for metrics reporting in milliseconds.
34+
using DurationMs = std::chrono::milliseconds;
35+
36+
/// \brief Report generated after a table scan operation.
37+
///
38+
/// Contains metrics about the planning and execution of a table scan,
39+
/// including information about manifests and data files processed.
40+
struct ICEBERG_EXPORT ScanReport {
41+
/// \brief The fully qualified name of the table that was scanned.
42+
std::string table_name;
43+
44+
/// \brief Snapshot ID that was scanned, if available.
45+
int64_t snapshot_id = -1;
46+
47+
/// \brief Filter expression used in the scan, if any.
48+
std::string filter;
49+
50+
/// \brief Schema ID.
51+
int32_t schema_id = -1;
52+
53+
/// \brief Total duration of the entire scan operation.
54+
DurationMs total_duration{0};
55+
56+
/// \brief Duration spent planning the scan.
57+
DurationMs total_planning_duration{0};
58+
59+
/// \brief Number of data files in the scan result.
60+
int64_t result_data_files = 0;
61+
62+
/// \brief Number of delete files in the scan result.
63+
int64_t result_delete_files = 0;
64+
65+
/// \brief Total number of data manifests.
66+
int64_t total_data_manifests = 0;
67+
68+
/// \brief Number of data manifests that were skipped.
69+
int64_t skipped_data_files = 0;
70+
71+
/// \brief Number of data manifests that were skipped.
72+
int64_t skipped_delete_files = 0;
73+
74+
/// \brief Number of data manifests that were scanned.
75+
int64_t scanned_data_manifests = 0;
76+
77+
/// \brief Number of data manifests that were skipped due to filtering.
78+
int64_t skipped_data_manifests = 0;
79+
80+
/// \brief Total number of delete manifests.
81+
int64_t total_delete_manifests = 0;
82+
83+
/// \brief Number of delete manifests that were scanned.
84+
int64_t scanned_delete_manifests = 0;
85+
86+
/// \brief Number of delete manifests that were skipped.
87+
int64_t skipped_delete_manifests = 0;
88+
};
89+
90+
/// \brief Report generated after a commit operation.
91+
///
92+
/// Contains metrics about the changes made in a commit, including
93+
/// files added/removed and retry information.
94+
struct ICEBERG_EXPORT CommitReport {
95+
/// \brief The fully qualified name of the table that was modified.
96+
std::string table_name;
97+
98+
/// \brief The snapshot ID created by this commit.
99+
int64_t snapshot_id = -1;
100+
101+
/// \brief The sequence number assigned to this commit.
102+
int64_t sequence_number = -1;
103+
104+
/// \brief The operation that was performed (append, overwrite, delete, etc.).
105+
std::string operation;
106+
107+
/// \brief Number of commit attempts (1 = success on first try).
108+
int32_t attempts = 1;
109+
110+
/// \brief Number of data files added in this commit.
111+
int64_t added_data_files = 0;
112+
113+
/// \brief Number of data files removed in this commit.
114+
int64_t removed_data_files = 0;
115+
116+
/// \brief Total number of data files after this commit.
117+
int64_t total_data_files = 0;
118+
119+
/// \brief Number of delete files added in this commit.
120+
int64_t added_delete_files = 0;
121+
122+
/// \brief Number of delete files removed in this commit.
123+
int64_t removed_delete_files = 0;
124+
125+
/// \brief Total number of delete files after this commit.
126+
int64_t total_delete_files = 0;
127+
128+
/// \brief Number of records added in this commit.
129+
int64_t added_records = 0;
130+
131+
/// \brief Number of records removed in this commit.
132+
int64_t removed_records = 0;
133+
134+
/// \brief Size in bytes of files added.
135+
int64_t added_files_size = 0;
136+
137+
/// \brief Size in bytes of files removed.
138+
int64_t removed_files_size = 0;
139+
};
140+
141+
/// \brief The type of a metrics report.
142+
enum class MetricsReportType {
143+
kScanReport,
144+
kCommitReport,
145+
};
146+
147+
/// \brief Get the string representation of a metrics report type.
148+
ICEBERG_EXPORT constexpr std::string_view ToString(MetricsReportType type) noexcept {
149+
switch (type) {
150+
case MetricsReportType::kScanReport:
151+
return "scan";
152+
case MetricsReportType::kCommitReport:
153+
return "commit";
154+
}
155+
std::unreachable();
156+
}
157+
158+
/// \brief A metrics report, which can be either a ScanReport or CommitReport.
159+
///
160+
/// This variant type allows handling both report types uniformly through
161+
/// the MetricsReporter interface.
162+
using MetricsReport = std::variant<ScanReport, CommitReport>;
163+
164+
/// \brief Get the type of a metrics report.
165+
///
166+
/// \param report The metrics report to get the type of.
167+
/// \return The type of the metrics report.
168+
ICEBERG_EXPORT inline MetricsReportType GetReportType(const MetricsReport& report) {
169+
return std::visit(
170+
[](const auto& r) -> MetricsReportType {
171+
using T = std::decay_t<decltype(r)>;
172+
if constexpr (std::is_same_v<T, ScanReport>) {
173+
return MetricsReportType::kScanReport;
174+
} else {
175+
return MetricsReportType::kCommitReport;
176+
}
177+
},
178+
report);
179+
}
180+
181+
/// \brief Interface for reporting metrics from Iceberg operations.
182+
///
183+
/// Implementations of this interface can be used to collect and report
184+
/// metrics about scan and commit operations. Common implementations include
185+
/// logging reporters, metrics collectors, and the noop reporter for testing.
186+
class ICEBERG_EXPORT MetricsReporter {
187+
public:
188+
virtual ~MetricsReporter() = default;
189+
190+
/// \brief Report a metrics report.
191+
///
192+
/// Implementations should handle the report according to their purpose
193+
/// (e.g., logging, sending to a metrics service, etc.).
194+
///
195+
/// \param report The metrics report to process.
196+
virtual void Report(const MetricsReport& report) = 0;
197+
};
198+
199+
} // namespace iceberg

src/iceberg/metrics_reporters.cc

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/metrics_reporters.h"
21+
22+
#include <unordered_set>
23+
24+
#include "iceberg/util/string_util.h"
25+
26+
namespace iceberg {
27+
28+
namespace {
29+
30+
/// \brief Registry type for MetricsReporter factories with heterogeneous lookup support.
31+
using MetricsReporterRegistry = std::unordered_map<std::string, MetricsReporterFactory>;
32+
33+
/// \brief Get the set of known metrics reporter types.
34+
const std::unordered_set<std::string>& DefaultReporterTypes() {
35+
static const std::unordered_set<std::string> kReporterTypes = {
36+
std::string(kMetricsReporterTypeNoop),
37+
};
38+
return kReporterTypes;
39+
}
40+
41+
/// \brief Infer the reporter type from properties.
42+
std::string InferReporterType(
43+
const std::unordered_map<std::string, std::string>& properties) {
44+
auto it = properties.find(std::string(kMetricsReporterType));
45+
if (it != properties.end() && !it->second.empty()) {
46+
return StringUtils::ToLower(it->second);
47+
}
48+
// Default to noop reporter
49+
return std::string(kMetricsReporterTypeNoop);
50+
}
51+
52+
/// \brief Metrics reporter that does nothing.
53+
///
54+
/// This is the default reporter used when no reporter is configured.
55+
/// It silently discards all reports.
56+
class NoopMetricsReporter : public MetricsReporter {
57+
public:
58+
static Result<std::unique_ptr<MetricsReporter>> Make(
59+
[[maybe_unused]] std::string_view name,
60+
[[maybe_unused]] const std::unordered_map<std::string, std::string>& properties) {
61+
return std::make_unique<NoopMetricsReporter>();
62+
}
63+
64+
void Report([[maybe_unused]] const MetricsReport& report) override {
65+
// Intentionally empty - noop implementation discards all reports
66+
}
67+
};
68+
69+
/// \brief Template helper to create factory functions for reporter types.
70+
template <typename T>
71+
MetricsReporterFactory MakeReporterFactory() {
72+
return
73+
[](std::string_view name, const std::unordered_map<std::string, std::string>& props)
74+
-> Result<std::unique_ptr<MetricsReporter>> { return T::Make(name, props); };
75+
}
76+
77+
/// \brief Create the default registry with built-in reporters.
78+
MetricsReporterRegistry CreateDefaultRegistry() {
79+
return {
80+
{std::string(kMetricsReporterTypeNoop), MakeReporterFactory<NoopMetricsReporter>()},
81+
};
82+
}
83+
84+
/// \brief Get the global registry of metrics reporter factories.
85+
MetricsReporterRegistry& GetRegistry() {
86+
static MetricsReporterRegistry registry = CreateDefaultRegistry();
87+
return registry;
88+
}
89+
90+
} // namespace
91+
92+
void MetricsReporters::Register(std::string_view reporter_type,
93+
MetricsReporterFactory factory) {
94+
GetRegistry()[StringUtils::ToLower(reporter_type)] = std::move(factory);
95+
}
96+
97+
Result<std::unique_ptr<MetricsReporter>> MetricsReporters::Load(
98+
std::string_view name,
99+
const std::unordered_map<std::string, std::string>& properties) {
100+
std::string reporter_type = InferReporterType(properties);
101+
102+
auto& registry = GetRegistry();
103+
auto it = registry.find(reporter_type);
104+
if (it == registry.end()) {
105+
if (DefaultReporterTypes().contains(reporter_type)) {
106+
return NotImplemented("Metrics reporter type '{}' is not yet supported",
107+
reporter_type);
108+
}
109+
return InvalidArgument("Unknown metrics reporter type: '{}'", reporter_type);
110+
}
111+
112+
return it->second(name, properties);
113+
}
114+
115+
} // namespace iceberg

0 commit comments

Comments
 (0)