Skip to content

Commit b167aed

Browse files
committed
fix: review comments
1 parent 8ac5ab2 commit b167aed

6 files changed

Lines changed: 116 additions & 54 deletions

File tree

src/iceberg/sort_order.cc

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -86,24 +86,21 @@ bool SortOrder::Equals(const SortOrder& other) const {
8686
return order_id_ == other.order_id_ && fields_ == other.fields_;
8787
}
8888

89-
bool SortOrder::IsBoundToSchema(const Schema& schema) const {
89+
Status SortOrder::Validate(const Schema& schema) const {
9090
for (const auto& field : fields_) {
9191
auto schema_field = schema.FindFieldById(field.source_id());
9292
if (!schema_field.has_value() || schema_field.value() == std::nullopt) {
93-
return false;
93+
return InvalidArgument("Cannot find schema field for sort field: {}", field);
9494
}
9595

9696
const auto& source_type = schema_field.value().value().get().type();
97-
if (!source_type->is_primitive()) {
98-
return false;
99-
}
10097

101-
auto result = field.transform()->ResultType(source_type);
102-
if (!result) {
103-
return false;
98+
if (!field.transform()->CanTransform(*source_type)) {
99+
return InvalidArgument("Cannot transform schema field type {} with transform {}",
100+
source_type->ToString(), field.transform()->ToString());
104101
}
105102
}
106-
return true;
103+
return {};
107104
}
108105

109106
Result<std::unique_ptr<SortOrder>> SortOrder::Make(const Schema& schema, int32_t sort_id,
@@ -123,13 +120,10 @@ Result<std::unique_ptr<SortOrder>> SortOrder::Make(const Schema& schema, int32_t
123120
}
124121

125122
const auto& source_type = schema_field.value().get().type();
126-
127-
if (!source_type->is_primitive()) {
128-
return InvalidArgument("Cannot sort by non-primitive source field: {}",
129-
*source_type);
123+
if (field.transform()->CanTransform(*source_type) == false) {
124+
return InvalidArgument("Cannot transform schema field type {} with transform {}",
125+
source_type->ToString(), field.transform()->ToString());
130126
}
131-
132-
ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type));
133127
}
134128

135129
return std::make_unique<SortOrder>(sort_id, std::move(fields));

src/iceberg/sort_order.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
7272
return lhs.Equals(rhs);
7373
}
7474

75-
/// \brief Checks whether the sort order is bound to the given schema.
76-
/// \param schema The schema to check against.
77-
/// \return true if the sort order is valid for the given schema.
78-
bool IsBoundToSchema(const Schema& schema) const;
75+
/// \brief Validates the sort order against a schema.
76+
/// \param schema The schema to validate against.
77+
/// \return Error status if the sort order is not bound to the schema.
78+
Status Validate(const Schema& schema) const;
7979

8080
/// \brief Create a SortOrder.
8181
/// \param schema The schema to bind the sort order to.

src/iceberg/test/sort_order_test.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ TEST_F(SortOrderMakeTest, MakeInvalidSortOrderNonPrimitiveField) {
231231
auto sort_order = SortOrder::Make(
232232
schema_with_struct, 1, std::vector<SortField>{*sort_field1_, sort_field_invalid});
233233
EXPECT_THAT(sort_order, IsError(ErrorKind::kInvalidArgument));
234-
EXPECT_THAT(sort_order, HasErrorMessage("Cannot sort by non-primitive source field"));
234+
EXPECT_THAT(sort_order, HasErrorMessage("Cannot transform schema field type"));
235235
}
236236

237237
TEST_F(SortOrderMakeTest, MakeInvalidSortOrderFieldNotInSchema) {
@@ -251,7 +251,10 @@ TEST_F(SortOrderMakeTest, MakeUnboundSortOrder) {
251251
auto sort_order =
252252
SortOrder::Make(1, std::vector<SortField>{*sort_field1_, sort_field_invalid});
253253
ASSERT_THAT(sort_order, IsOk());
254-
ASSERT_EQ(sort_order.value()->IsBoundToSchema(*schema_), false);
254+
auto validate_status = sort_order.value()->Validate(*schema_);
255+
EXPECT_THAT(validate_status, IsError(ErrorKind::kInvalidArgument));
256+
EXPECT_THAT(validate_status,
257+
HasErrorMessage("Cannot find source column for sort field:"));
255258
}
256259

257260
} // namespace iceberg

src/iceberg/test/transform_test.cc

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <gtest/gtest.h>
2828

2929
#include "iceberg/expression/literal.h"
30+
#include "iceberg/schema_field.h"
3031
#include "iceberg/test/matchers.h"
3132
#include "iceberg/test/temporal_test_helper.h"
3233
#include "iceberg/type.h"
@@ -841,4 +842,76 @@ TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) {
841842
}
842843
}
843844

845+
TEST(TransformCanTransformTest, CanTransform) {
846+
struct Case {
847+
std::string transform_str;
848+
std::shared_ptr<Type> source_type;
849+
bool expected;
850+
};
851+
852+
const std::vector<Case> cases = {
853+
// Identity can transform all primitive types
854+
{.transform_str = "identity", .source_type = int32(), .expected = true},
855+
{.transform_str = "identity", .source_type = string(), .expected = true},
856+
{.transform_str = "identity", .source_type = boolean(), .expected = true},
857+
{.transform_str = "identity",
858+
.source_type = list(SchemaField(123, "element", int32(), false)),
859+
.expected = false},
860+
861+
// Void can transform any type
862+
{.transform_str = "void", .source_type = iceberg::int32(), .expected = true},
863+
{.transform_str = "void",
864+
.source_type = iceberg::map(SchemaField(123, "key", iceberg::string(), false),
865+
SchemaField(124, "value", iceberg::int32(), true)),
866+
.expected = true},
867+
868+
// Bucket can transform specific types
869+
{.transform_str = "bucket[16]", .source_type = iceberg::int32(), .expected = true},
870+
{.transform_str = "bucket[16]", .source_type = iceberg::string(), .expected = true},
871+
{.transform_str = "bucket[16]",
872+
.source_type = iceberg::float32(),
873+
.expected = false},
874+
875+
// Truncate can transform specific types
876+
{.transform_str = "truncate[32]",
877+
.source_type = iceberg::int32(),
878+
.expected = true},
879+
{.transform_str = "truncate[32]",
880+
.source_type = iceberg::string(),
881+
.expected = true},
882+
{.transform_str = "truncate[32]",
883+
.source_type = iceberg::boolean(),
884+
.expected = false},
885+
886+
// Year can transform date and timestamp types
887+
{.transform_str = "year", .source_type = iceberg::date(), .expected = true},
888+
{.transform_str = "year", .source_type = iceberg::timestamp(), .expected = true},
889+
{.transform_str = "year", .source_type = iceberg::string(), .expected = false},
890+
891+
// Month can transform date and timestamp types
892+
{.transform_str = "month", .source_type = iceberg::date(), .expected = true},
893+
{.transform_str = "month", .source_type = iceberg::timestamp(), .expected = true},
894+
{.transform_str = "month", .source_type = iceberg::binary(), .expected = false},
895+
896+
// Day can transform date and timestamp types
897+
{.transform_str = "day", .source_type = iceberg::date(), .expected = true},
898+
{.transform_str = "day", .source_type = iceberg::timestamp(), .expected = true},
899+
{.transform_str = "day", .source_type = iceberg::uuid(), .expected = false},
900+
901+
// Hour can transform timestamp types
902+
{.transform_str = "hour", .source_type = iceberg::timestamp(), .expected = true},
903+
{.transform_str = "hour", .source_type = iceberg::timestamp_tz(), .expected = true},
904+
{.transform_str = "hour", .source_type = iceberg::int32(), .expected = false},
905+
};
906+
907+
for (const auto& c : cases) {
908+
auto transform = TransformFromString(c.transform_str);
909+
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
910+
911+
EXPECT_EQ(transform.value()->CanTransform(*c.source_type), c.expected)
912+
<< "Unexpected result for transform: " << c.transform_str
913+
<< " and source type: " << c.source_type->ToString();
914+
}
915+
}
916+
844917
} // namespace iceberg

src/iceberg/transform.cc

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -126,21 +126,18 @@ Result<std::shared_ptr<TransformFunction>> Transform::Bind(
126126
}
127127
}
128128

129-
Result<std::shared_ptr<Type>> Transform::ResultType(
130-
const std::shared_ptr<Type>& source_type) const {
129+
bool Transform::CanTransform(const Type& source_type) const {
131130
switch (transform_type_) {
132131
case TransformType::kIdentity:
133-
if (!source_type->is_primitive()) [[unlikely]] {
134-
return InvalidArgument("{} is not a valid input type of identity transform",
135-
source_type->ToString());
132+
if (!source_type.is_primitive()) [[unlikely]] {
133+
return false;
136134
}
137-
return source_type;
135+
return true;
138136
case TransformType::kVoid:
139-
return source_type;
140137
case TransformType::kUnknown:
141-
return string();
138+
return true;
142139
case TransformType::kBucket:
143-
switch (source_type->type_id()) {
140+
switch (source_type.type_id()) {
144141
case TypeId::kInt:
145142
case TypeId::kLong:
146143
case TypeId::kDecimal:
@@ -152,56 +149,50 @@ Result<std::shared_ptr<Type>> Transform::ResultType(
152149
case TypeId::kUuid:
153150
case TypeId::kFixed:
154151
case TypeId::kBinary:
155-
return int32();
152+
return true;
156153
default:
157-
return InvalidArgument("{} is not a valid input type of bucket transform",
158-
source_type->ToString());
154+
return false;
159155
}
160156
case TransformType::kTruncate:
161-
switch (source_type->type_id()) {
157+
switch (source_type.type_id()) {
162158
case TypeId::kInt:
163159
case TypeId::kLong:
164160
case TypeId::kString:
165161
case TypeId::kBinary:
166162
case TypeId::kDecimal:
167-
return source_type;
163+
return true;
168164
default:
169-
return InvalidArgument("{} is not a valid input type of truncate transform",
170-
source_type->ToString());
165+
return false;
171166
}
172167
case TransformType::kYear:
173168
case TransformType::kMonth:
174-
switch (source_type->type_id()) {
169+
switch (source_type.type_id()) {
175170
case TypeId::kDate:
176171
case TypeId::kTimestamp:
177172
case TypeId::kTimestampTz:
178-
return int32();
173+
return true;
179174
default:
180-
return InvalidArgument("{} is not a valid input type of {} transform",
181-
source_type->ToString(), this->ToString());
175+
return false;
182176
}
183177
case TransformType::kDay:
184-
switch (source_type->type_id()) {
178+
switch (source_type.type_id()) {
185179
case TypeId::kDate:
186180
case TypeId::kTimestamp:
187181
case TypeId::kTimestampTz:
188-
return date();
182+
return true;
189183
default:
190-
return InvalidArgument("{} is not a valid input type of day transform",
191-
source_type->ToString());
184+
return false;
192185
}
193186
case TransformType::kHour:
194-
switch (source_type->type_id()) {
187+
switch (source_type.type_id()) {
195188
case TypeId::kTimestamp:
196189
case TypeId::kTimestampTz:
197-
return int32();
190+
return true;
198191
default:
199-
return InvalidArgument("{} is not a valid input type of hour transform",
200-
source_type->ToString());
192+
return false;
201193
}
202-
default:
203-
std::unreachable();
204194
}
195+
std::unreachable();
205196
}
206197

207198
bool Transform::PreservesOrder() const {

src/iceberg/transform.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
150150
Result<std::shared_ptr<TransformFunction>> Bind(
151151
const std::shared_ptr<Type>& source_type) const;
152152

153-
/// \brief Returns the Type produced by this transform given a source type.
154-
Result<std::shared_ptr<Type>> ResultType(
155-
const std::shared_ptr<Type>& source_type) const;
153+
/// \brief Checks whether this function can be applied to the given Type.
154+
/// \param source_type The source type to check.
155+
/// \return true if this transform can be applied to the type, false otherwise
156+
bool CanTransform(const Type& source_type) const;
156157

157158
/// \brief Whether the transform preserves the order of values (is monotonic).
158159
bool PreservesOrder() const;

0 commit comments

Comments
 (0)