Skip to content

Commit a144762

Browse files
committed
address feedback
1 parent 33a736b commit a144762

File tree

2 files changed

+28
-18
lines changed

2 files changed

+28
-18
lines changed

src/iceberg/manifest/manifest_reader.cc

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ namespace iceberg {
4545

4646
namespace {
4747

48+
// TODO(gangwu): refactor these macros with template functions.
4849
#define PARSE_PRIMITIVE_FIELD(item, array_view, type) \
4950
for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) { \
5051
if (!ArrowArrayViewIsNull(array_view, row_idx)) { \
@@ -145,30 +146,30 @@ Status ParsePartitionFieldSummaryList(ArrowArrayView* view_of_column,
145146
if (view_of_column->storage_type != ArrowType::NANOARROW_TYPE_LIST) {
146147
return InvalidManifestList("partitions field should be a list.");
147148
}
148-
auto view_of_list_iterm = view_of_column->children[0];
149-
// view_of_list_iterm is struct<PartitionFieldSummary>
150-
if (view_of_list_iterm->storage_type != ArrowType::NANOARROW_TYPE_STRUCT) {
151-
return InvalidManifestList("partitions list field should be a list.");
149+
auto view_of_list_item = view_of_column->children[0];
150+
// view_of_list_item is struct<PartitionFieldSummary>
151+
if (view_of_list_item->storage_type != ArrowType::NANOARROW_TYPE_STRUCT) {
152+
return InvalidManifestList("partitions list item should be a struct.");
152153
}
153-
if (view_of_list_iterm->n_children != 4) {
154+
if (view_of_list_item->n_children != 4) {
154155
return InvalidManifestList("PartitionFieldSummary should have 4 fields.");
155156
}
156-
if (view_of_list_iterm->children[0]->storage_type != ArrowType::NANOARROW_TYPE_BOOL) {
157+
if (view_of_list_item->children[0]->storage_type != ArrowType::NANOARROW_TYPE_BOOL) {
157158
return InvalidManifestList("contains_null should have be bool type column.");
158159
}
159-
auto contains_null = view_of_list_iterm->children[0];
160-
if (view_of_list_iterm->children[1]->storage_type != ArrowType::NANOARROW_TYPE_BOOL) {
160+
auto contains_null = view_of_list_item->children[0];
161+
if (view_of_list_item->children[1]->storage_type != ArrowType::NANOARROW_TYPE_BOOL) {
161162
return InvalidManifestList("contains_nan should have be bool type column.");
162163
}
163-
auto contains_nan = view_of_list_iterm->children[1];
164-
if (view_of_list_iterm->children[2]->storage_type != ArrowType::NANOARROW_TYPE_BINARY) {
164+
auto contains_nan = view_of_list_item->children[1];
165+
if (view_of_list_item->children[2]->storage_type != ArrowType::NANOARROW_TYPE_BINARY) {
165166
return InvalidManifestList("lower_bound should have be binary type column.");
166167
}
167-
auto lower_bound_list = view_of_list_iterm->children[2];
168-
if (view_of_list_iterm->children[3]->storage_type != ArrowType::NANOARROW_TYPE_BINARY) {
168+
auto lower_bound_list = view_of_list_item->children[2];
169+
if (view_of_list_item->children[3]->storage_type != ArrowType::NANOARROW_TYPE_BINARY) {
169170
return InvalidManifestList("upper_bound should have be binary type column.");
170171
}
171-
auto upper_bound_list = view_of_list_iterm->children[3];
172+
auto upper_bound_list = view_of_list_item->children[3];
172173
for (int64_t manifest_idx = 0; manifest_idx < manifest_count; manifest_idx++) {
173174
auto offset = ArrowArrayViewListChildOffset(view_of_column, manifest_idx);
174175
auto next_offset = ArrowArrayViewListChildOffset(view_of_column, manifest_idx + 1);
@@ -303,8 +304,8 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
303304
return manifest_files;
304305
}
305306

306-
Status ParseLiteral(ArrowArrayView* view_of_partition, int64_t row_idx,
307-
std::vector<ManifestEntry>& manifest_entries) {
307+
Status ParsePartitionValues(ArrowArrayView* view_of_partition, int64_t row_idx,
308+
std::vector<ManifestEntry>& manifest_entries) {
308309
switch (view_of_partition->storage_type) {
309310
case ArrowType::NANOARROW_TYPE_BOOL: {
310311
auto value = ArrowArrayViewGetUIntUnsafe(view_of_partition, row_idx);
@@ -395,7 +396,7 @@ Status ParseDataFile(const std::shared_ptr<StructType>& data_file_schema,
395396
break;
396397
}
397398
ICEBERG_RETURN_UNEXPECTED(
398-
ParseLiteral(view_of_partition, row_idx, manifest_entries));
399+
ParsePartitionValues(view_of_partition, row_idx, manifest_entries));
399400
}
400401
}
401402
}
@@ -568,7 +569,7 @@ bool RequireStatsProjection(const std::shared_ptr<Expression>& row_filter,
568569
if (columns.empty()) {
569570
return false;
570571
}
571-
const std::unordered_set<std::string> selected(columns.cbegin(), columns.cend());
572+
const std::unordered_set<std::string_view> selected(columns.cbegin(), columns.cend());
572573
if (selected.contains(ManifestReader::kAllColumns)) {
573574
return false;
574575
}

src/iceberg/manifest/manifest_reader.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ namespace iceberg {
3737
class ICEBERG_EXPORT ManifestReader {
3838
public:
3939
/// \brief Special value to select all columns from manifest files.
40-
inline static const std::string kAllColumns = "*";
40+
static constexpr std::string_view kAllColumns = "*";
4141

4242
virtual ~ManifestReader() = default;
4343

@@ -50,16 +50,25 @@ class ICEBERG_EXPORT ManifestReader {
5050
virtual Result<std::vector<ManifestEntry>> LiveEntries() = 0;
5151

5252
/// \brief Select specific columns of data file to read from the manifest entries.
53+
///
54+
/// \note Column names should match the names in `DataFile` schema. Unmatched names
55+
/// will be ignored.
5356
virtual ManifestReader& Select(const std::vector<std::string>& columns) = 0;
5457

5558
/// \brief Filter manifest entries by partition filter.
59+
///
60+
/// \note Unlike the Java implementation, this method does not combine new expressions
61+
/// with existing ones. Each call replaces the previous partition filter.
5662
virtual ManifestReader& FilterPartitions(std::shared_ptr<Expression> expr) = 0;
5763

5864
/// \brief Filter manifest entries to a specific set of partitions.
5965
virtual ManifestReader& FilterPartitions(
6066
std::shared_ptr<class PartitionSet> partition_set) = 0;
6167

6268
/// \brief Filter manifest entries by row-level filter.
69+
///
70+
/// \note Unlike the Java implementation, this method does not combine new expressions
71+
/// with existing ones. Each call replaces the previous row filter.
6372
virtual ManifestReader& FilterRows(std::shared_ptr<Expression> expr) = 0;
6473

6574
/// \brief Set case sensitivity for column name matching.

0 commit comments

Comments
 (0)