Skip to content

Commit eff0f2b

Browse files
committed
feat: adopt partition summary
1 parent 7f7f85b commit eff0f2b

14 files changed

Lines changed: 174 additions & 51 deletions

src/iceberg/expression/literal.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,17 @@ bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value
504504

505505
bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }
506506

507+
bool Literal::IsNan() const {
508+
if (type_->type_id() == TypeId::kFloat) {
509+
auto val = std::get<float>(value_);
510+
return std::isnan(val);
511+
} else if (type_->type_id() == TypeId::kDouble) {
512+
auto val = std::get<double>(value_);
513+
return std::isnan(val);
514+
}
515+
return false;
516+
}
517+
507518
// LiteralCaster implementation
508519

509520
Result<Literal> LiteralCaster::CastTo(const Literal& literal,

src/iceberg/expression/literal.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,14 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
150150
/// \return true if this literal represents a BelowMin value, false otherwise
151151
bool IsBelowMin() const;
152152

153-
/// Check if this literal is null.
153+
/// \brief Check if this literal is null.
154154
/// \return true if this literal is null, false otherwise
155155
bool IsNull() const;
156156

157+
/// \brief Check if this literal is NaN (Not a Number).
158+
/// \return true if this literal is NaN, false otherwise
159+
bool IsNan() const;
160+
157161
std::string ToString() const override;
158162

159163
private:

src/iceberg/manifest_adapter.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <memory>
2626
#include <optional>
2727
#include <unordered_map>
28-
#include <unordered_set>
2928
#include <vector>
3029

3130
#include "iceberg/arrow_c_data.h"
@@ -95,6 +94,13 @@ class ICEBERG_EXPORT ManifestEntryAdapter : public ManifestAdapter {
9594
std::shared_ptr<Schema> current_schema_;
9695
std::shared_ptr<Schema> manifest_schema_;
9796
const ManifestContent content_;
97+
int32_t add_files_count_{0};
98+
int32_t existing_files_count_{0};
99+
int32_t delete_files_count_{0};
100+
int64_t add_rows_count_{0L};
101+
int64_t existing_rows_count_{0L};
102+
int64_t delete_rows_count_{0L};
103+
std::optional<int64_t> min_data_sequence_number_{std::nullopt};
98104
};
99105

100106
/// \brief Adapter for appending a list of `ManifestFile`s to an `ArrowArray`.

src/iceberg/manifest_entry.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,6 @@ ICEBERG_EXPORT constexpr Result<ManifestStatus> ManifestStatusFromInt(
5757
}
5858
}
5959

60-
enum class ManifestContent {
61-
kData = 0,
62-
kDeletes = 1,
63-
};
64-
65-
ICEBERG_EXPORT constexpr std::string_view ToString(ManifestContent content) noexcept;
66-
ICEBERG_EXPORT constexpr Result<ManifestContent> ManifestContentFromString(
67-
std::string_view str) noexcept;
68-
6960
/// \brief DataFile carries data file path, partition tuple, metrics, ...
7061
struct ICEBERG_EXPORT DataFile {
7162
/// \brief Content of a data file
@@ -315,6 +306,11 @@ struct ICEBERG_EXPORT ManifestEntry {
315306
inline static const SchemaField kFileSequenceNumber =
316307
SchemaField::MakeOptional(4, "file_sequence_number", iceberg::int64());
317308

309+
/// \brief Check if this manifest entry is deleted.
310+
constexpr bool IsAlive() const {
311+
return status == ManifestStatus::kAdded || status == ManifestStatus::kExisting;
312+
}
313+
318314
bool operator==(const ManifestEntry& other) const;
319315

320316
static std::shared_ptr<StructType> TypeFromPartitionType(

src/iceberg/manifest_list.h

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <optional>
2626
#include <string>
2727
#include <string_view>
28-
#include <utility>
2928

3029
#include "iceberg/iceberg_export.h"
3130
#include "iceberg/partition_spec.h"
@@ -72,17 +71,17 @@ struct ICEBERG_EXPORT PartitionFieldSummary {
7271
static const StructType& Type();
7372
};
7473

74+
/// \brief The type of files tracked by the manifest, either data or delete files; 0 for
75+
/// all v1 manifests
76+
enum class ManifestContent {
77+
/// The manifest content is data.
78+
kData = 0,
79+
/// The manifest content is deletes.
80+
kDeletes = 1,
81+
};
82+
7583
/// \brief Entry in a manifest list.
7684
struct ICEBERG_EXPORT ManifestFile {
77-
/// \brief The type of files tracked by the manifest, either data or delete files; 0 for
78-
/// all v1 manifests
79-
enum class Content {
80-
/// The manifest content is data.
81-
kData = 0,
82-
/// The manifest content is deletes.
83-
kDeletes = 1,
84-
};
85-
8685
/// Field id: 500
8786
/// Location of the manifest file
8887
std::string manifest_path;
@@ -96,7 +95,7 @@ struct ICEBERG_EXPORT ManifestFile {
9695
/// Field id: 517
9796
/// The type of files tracked by the manifest, either data or delete files; 0 for all v1
9897
/// manifests
99-
Content content = Content::kData;
98+
ManifestContent content = ManifestContent::kData;
10099
/// Field id: 515
101100
/// The sequence number when the manifest was added to the table; use 0 when reading v1
102101
/// manifest lists
@@ -218,21 +217,20 @@ struct ICEBERG_EXPORT ManifestList {
218217
};
219218

220219
/// \brief Get the relative manifest content type name
221-
ICEBERG_EXPORT constexpr std::string_view ToString(ManifestFile::Content type) noexcept {
220+
ICEBERG_EXPORT constexpr std::string_view ToString(ManifestContent type) noexcept {
222221
switch (type) {
223-
case ManifestFile::Content::kData:
222+
case ManifestContent::kData:
224223
return "data";
225-
case ManifestFile::Content::kDeletes:
224+
case ManifestContent::kDeletes:
226225
return "deletes";
227226
}
228-
std::unreachable();
229227
}
230228

231229
/// \brief Get the relative manifest content type from name
232-
ICEBERG_EXPORT constexpr Result<ManifestFile::Content> ManifestFileContentFromString(
230+
ICEBERG_EXPORT constexpr Result<ManifestContent> ManifestContentFromString(
233231
std::string_view str) noexcept {
234-
if (str == "data") return ManifestFile::Content::kData;
235-
if (str == "deletes") return ManifestFile::Content::kDeletes;
232+
if (str == "data") return ManifestContent::kData;
233+
if (str == "deletes") return ManifestContent::kDeletes;
236234
return InvalidArgument("Invalid manifest content type: {}", str);
237235
}
238236

src/iceberg/manifest_reader_internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
237237
break;
238238
case ManifestFileField::kContent:
239239
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].content, view_of_column,
240-
ManifestFile::Content);
240+
ManifestContent);
241241
break;
242242
case ManifestFileField::kSequenceNumber:
243243
PARSE_PRIMITIVE_FIELD(manifest_files[row_idx].sequence_number, view_of_column,
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/partition_summary_internal.h"
21+
22+
#include "iceberg/expression/literal.h"
23+
24+
namespace iceberg {
25+
26+
Status PartitionFieldStats::Update(const Literal& value) {
27+
if (type_->type_id() != value.type()->type_id()) {
28+
return InvalidArgument("value is not compatible with type");
29+
}
30+
31+
if (value.IsNull()) {
32+
contains_null_ = true;
33+
return {};
34+
}
35+
36+
if (value.IsNan()) {
37+
contains_nan_ = true;
38+
return {};
39+
}
40+
41+
if (!lower_bound_ || value < *lower_bound_) {
42+
lower_bound_ = value;
43+
}
44+
if (!upper_bound_ || value > *upper_bound_) {
45+
upper_bound_ = value;
46+
}
47+
return {};
48+
}
49+
50+
} // namespace iceberg
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include "iceberg/expression/literal.h"
23+
#include "iceberg/result.h"
24+
#include "iceberg/type_fwd.h"
25+
26+
namespace iceberg {
27+
28+
class PartitionFieldStats {
29+
public:
30+
explicit PartitionFieldStats(const std::shared_ptr<PrimitiveType>& type)
31+
: type_(type) {}
32+
33+
Status Update(const Literal& value);
34+
35+
PartitionFieldSummary Finish() const {
36+
PartitionFieldSummary summary;
37+
summary.contains_null = contains_null_;
38+
summary.contains_nan = contains_nan_;
39+
if (lower_bound_) {
40+
summary.lower_bound = lower_bound_->Serialize().value();
41+
}
42+
if (upper_bound_) {
43+
summary.upper_bound = upper_bound_->Serialize().value();
44+
}
45+
return summary;
46+
}
47+
48+
private:
49+
std::shared_ptr<PrimitiveType> type_{nullptr};
50+
bool contains_null_{false};
51+
bool contains_nan_{false};
52+
std::optional<Literal> lower_bound_;
53+
std::optional<Literal> upper_bound_;
54+
};
55+
56+
class PartitionSummary {};
57+
58+
} // namespace iceberg

src/iceberg/test/manifest_list_reader_writer_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class ManifestListReaderWriterTestBase : public TempFileTestBase {
6464
for (const auto& manifest : manifest_files) {
6565
ASSERT_EQ(manifest.partition_spec_id, 0);
6666
ASSERT_TRUE(manifest.partitions.empty());
67-
ASSERT_EQ(manifest.content, ManifestFile::Content::kData);
67+
ASSERT_EQ(manifest.content, ManifestContent::kData);
6868
}
6969
}
7070

@@ -242,7 +242,7 @@ class ManifestListReaderWriterV2Test : public ManifestListReaderWriterTestBase {
242242
manifest_file.manifest_path = test_dir_prefix + paths[i];
243243
manifest_file.manifest_length = file_size[i];
244244
manifest_file.partition_spec_id = 0;
245-
manifest_file.content = ManifestFile::Content::kData;
245+
manifest_file.content = ManifestContent::kData;
246246
manifest_file.sequence_number = 4 - i;
247247
manifest_file.min_sequence_number = 4 - i;
248248
manifest_file.added_snapshot_id = snapshot_id[i];
@@ -282,7 +282,7 @@ class ManifestListReaderWriterV2Test : public ManifestListReaderWriterTestBase {
282282
manifest_file.manifest_path = test_dir_prefix + paths[i];
283283
manifest_file.manifest_length = file_size[i];
284284
manifest_file.partition_spec_id = 0;
285-
manifest_file.content = ManifestFile::Content::kData;
285+
manifest_file.content = ManifestContent::kData;
286286
manifest_file.sequence_number = 4 - i;
287287
manifest_file.min_sequence_number = 4 - i;
288288
manifest_file.added_snapshot_id = snapshot_id[i];

src/iceberg/test/manifest_list_versions_test.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ const static auto kTestManifest = ManifestFile{
5959
.manifest_path = kPath,
6060
.manifest_length = kLength,
6161
.partition_spec_id = kSpecId,
62-
.content = ManifestFile::Content::kData,
62+
.content = ManifestContent::kData,
6363
.sequence_number = kSeqNum,
6464
.min_sequence_number = kMinSeqNum,
6565
.added_snapshot_id = kSnapshotId,
@@ -78,7 +78,7 @@ const static auto kDeleteManifest = ManifestFile{
7878
.manifest_path = kPath,
7979
.manifest_length = kLength,
8080
.partition_spec_id = kSpecId,
81-
.content = ManifestFile::Content::kDeletes,
81+
.content = ManifestContent::kDeletes,
8282
.sequence_number = kSeqNum,
8383
.min_sequence_number = kMinSeqNum,
8484
.added_snapshot_id = kSnapshotId,
@@ -227,7 +227,7 @@ TEST_F(TestManifestListVersions, TestV1Write) {
227227
EXPECT_EQ(manifest.manifest_path, kPath);
228228
EXPECT_EQ(manifest.manifest_length, kLength);
229229
EXPECT_EQ(manifest.partition_spec_id, kSpecId);
230-
EXPECT_EQ(manifest.content, ManifestFile::Content::kData);
230+
EXPECT_EQ(manifest.content, ManifestContent::kData);
231231
EXPECT_EQ(manifest.added_snapshot_id, kSnapshotId);
232232
EXPECT_EQ(manifest.added_files_count, kAddedFiles);
233233
EXPECT_EQ(manifest.existing_files_count, kExistingFiles);
@@ -247,7 +247,7 @@ TEST_F(TestManifestListVersions, TestV2Write) {
247247
EXPECT_EQ(manifest.manifest_path, kPath);
248248
EXPECT_EQ(manifest.manifest_length, kLength);
249249
EXPECT_EQ(manifest.partition_spec_id, kSpecId);
250-
EXPECT_EQ(manifest.content, ManifestFile::Content::kData);
250+
EXPECT_EQ(manifest.content, ManifestContent::kData);
251251
EXPECT_EQ(manifest.sequence_number, kSeqNum);
252252
EXPECT_EQ(manifest.min_sequence_number, kMinSeqNum);
253253
EXPECT_EQ(manifest.added_snapshot_id, kSnapshotId);
@@ -266,7 +266,7 @@ TEST_F(TestManifestListVersions, TestV3Write) {
266266
EXPECT_EQ(manifest.manifest_path, kPath);
267267
EXPECT_EQ(manifest.manifest_length, kLength);
268268
EXPECT_EQ(manifest.partition_spec_id, kSpecId);
269-
EXPECT_EQ(manifest.content, ManifestFile::Content::kData);
269+
EXPECT_EQ(manifest.content, ManifestContent::kData);
270270
EXPECT_EQ(manifest.sequence_number, kSeqNum);
271271
EXPECT_EQ(manifest.min_sequence_number, kMinSeqNum);
272272
EXPECT_EQ(manifest.added_snapshot_id, kSnapshotId);
@@ -292,7 +292,7 @@ TEST_F(TestManifestListVersions, TestV3WriteFirstRowIdAssignment) {
292292
EXPECT_EQ(manifest.manifest_path, kPath);
293293
EXPECT_EQ(manifest.manifest_length, kLength);
294294
EXPECT_EQ(manifest.partition_spec_id, kSpecId);
295-
EXPECT_EQ(manifest.content, ManifestFile::Content::kData);
295+
EXPECT_EQ(manifest.content, ManifestContent::kData);
296296
EXPECT_EQ(manifest.sequence_number, kSeqNum);
297297
EXPECT_EQ(manifest.min_sequence_number, kMinSeqNum);
298298
EXPECT_EQ(manifest.added_snapshot_id, kSnapshotId);
@@ -323,7 +323,7 @@ TEST_F(TestManifestListVersions, TestV3WriteMixedRowIdAssignment) {
323323
EXPECT_EQ(manifest.manifest_path, kPath);
324324
EXPECT_EQ(manifest.manifest_length, kLength);
325325
EXPECT_EQ(manifest.partition_spec_id, kSpecId);
326-
EXPECT_EQ(manifest.content, ManifestFile::Content::kData);
326+
EXPECT_EQ(manifest.content, ManifestContent::kData);
327327
EXPECT_EQ(manifest.sequence_number, kSeqNum);
328328
EXPECT_EQ(manifest.min_sequence_number, kMinSeqNum);
329329
EXPECT_EQ(manifest.added_snapshot_id, kSnapshotId);
@@ -442,7 +442,7 @@ TEST_F(TestManifestListVersions, TestManifestsPartitionSummary) {
442442
.manifest_path = kPath,
443443
.manifest_length = kLength,
444444
.partition_spec_id = kSpecId,
445-
.content = ManifestFile::Content::kData,
445+
.content = ManifestContent::kData,
446446
.sequence_number = kSeqNum,
447447
.min_sequence_number = kMinSeqNum,
448448
.added_snapshot_id = kSnapshotId,

0 commit comments

Comments
 (0)