Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/iceberg/partition_spec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,8 @@ Result<std::string> PartitionSpec::PartitionPath(const PartitionValues& data) co
if (i > 0) {
ss << "/";
}
// TODO(zhuo.wang): transform for partition value, will be fixed after transform util
// is ready
std::string partition_value = value.get().ToString();
ICEBERG_ASSIGN_OR_RAISE(auto partition_value,
fields_[i].transform()->ToHumanString(value));
ss << UrlEncoder::Encode(fields_[i].name()) << "="
<< UrlEncoder::Encode(partition_value);
}
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ add_iceberg_test(schema_test
schema_util_test.cc
sort_field_test.cc
sort_order_test.cc
transform_human_string_test.cc
transform_test.cc
type_test.cc)

Expand Down
2 changes: 1 addition & 1 deletion src/iceberg/test/location_provider_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ TEST_F(LocationProviderTest, ObjectStorageWithPartition) {

std::vector<std::string> parts = SplitString(location, '/');
ASSERT_GT(parts.size(), 2);
EXPECT_EQ("data%231=%22val%231%22", parts[parts.size() - 2]);
EXPECT_EQ("data%231=val%231", parts[parts.size() - 2]);
}

TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ iceberg_tests = {
'schema_util_test.cc',
'sort_field_test.cc',
'sort_order_test.cc',
'transform_human_string_test.cc',
'transform_test.cc',
'type_test.cc',
),
Expand Down
5 changes: 2 additions & 3 deletions src/iceberg/test/partition_spec_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) {
PartitionValues part_data(
{Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
std::string expected =
"id_partition=123/name_partition=%22val2%22/ts_partition=19489";
std::string expected = "id_partition=123/name_partition=val2/ts_partition=2023-05-12";
EXPECT_EQ(expected, path);
}

Expand All @@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) {
{Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
std::string expected =
"id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
"id_partition=123/name_partition=val%232/ts_partition=2023-05-12";
EXPECT_EQ(expected, path);
}
}
Expand Down
185 changes: 185 additions & 0 deletions src/iceberg/test/transform_human_string_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <memory>
#include <string>

#include <gtest/gtest.h>

#include "iceberg/test/matchers.h"
#include "iceberg/transform.h"

namespace iceberg {

struct HumanStringTestParam {
std::string test_name;
std::shared_ptr<Type> source_type;
Literal literal;
std::vector<std::string> expecteds;
};

class IdentityHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
protected:
std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
};

TEST_P(IdentityHumanStringTest, ToHumanString) {
const auto& param = GetParam();
for (int32_t i = 0; i < transforms_.size(); ++i) {
EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
HasValue(::testing::Eq(param.expecteds[i])));
}
}

INSTANTIATE_TEST_SUITE_P(
IdentityHumanStringTestCases, IdentityHumanStringTest,
::testing::Values(
HumanStringTestParam{.test_name = "Null",
.literal = Literal::Null(std::make_shared<IntType>()),
.expecteds{"null"}},
HumanStringTestParam{.test_name = "Binary",
.literal = Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
.expecteds{"AQID"}},
HumanStringTestParam{.test_name = "Fixed",
.literal = Literal::Fixed(std::vector<uint8_t>{1, 2, 3}),
.expecteds{"AQID"}},
HumanStringTestParam{.test_name = "Date",
.literal = Literal::Date(17501),
.expecteds{"2017-12-01"}},
HumanStringTestParam{.test_name = "Time",
.literal = Literal::Time(36775038194),
.expecteds{"10:12:55.038194"}},
HumanStringTestParam{.test_name = "TimestampWithZone",
.literal = Literal::TimestampTz(1512151975038194),
.expecteds{"2017-12-01T18:12:55.038194+00:00"}},
HumanStringTestParam{.test_name = "TimestampWithoutZone",
.literal = Literal::Timestamp(1512123175038194),
.expecteds{"2017-12-01T10:12:55.038194"}},
HumanStringTestParam{.test_name = "Long",
.literal = Literal::Long(-1234567890000L),
.expecteds{"-1234567890000"}},
HumanStringTestParam{.test_name = "String",
.literal = Literal::String("a/b/c=d"),
.expecteds{"a/b/c=d"}}),
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
return info.param.test_name;
});

class DateHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
protected:
std::vector<std::shared_ptr<Transform>> transforms_{
Transform::Year(), Transform::Month(), Transform::Day()};
};

TEST_P(DateHumanStringTest, ToHumanString) {
const auto& param = GetParam();

for (uint32_t i = 0; i < transforms_.size(); i++) {
ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
transforms_[i]->Bind(std::make_shared<DateType>()));
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
HasValue(::testing::Eq(param.expecteds[i])));
}
}

INSTANTIATE_TEST_SUITE_P(
DateHumanStringTestCases, DateHumanStringTest,
::testing::Values(
HumanStringTestParam{.test_name = "Date",
.literal = Literal::Date(17501),
.expecteds = {"2017", "2017-12", "2017-12-01"}},
HumanStringTestParam{.test_name = "NegativeDate",
.literal = Literal::Date(-2),
.expecteds = {"1969", "1969-12", "1969-12-30"}},
HumanStringTestParam{.test_name = "DateLowerBound",
.literal = Literal::Date(0),
.expecteds = {"1970", "1970-01", "1970-01-01"}},
HumanStringTestParam{.test_name = "NegativeDateLowerBound",
.literal = Literal::Date(-365),
.expecteds = {"1969", "1969-01", "1969-01-01"}},
HumanStringTestParam{.test_name = "NegativeDateUpperBound",
.literal = Literal::Date(-1),
.expecteds = {"1969", "1969-12", "1969-12-31"}},
HumanStringTestParam{.test_name = "Null",
.literal = Literal::Null(std::make_shared<DateType>()),
.expecteds = {"null", "null", "null"}}),
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
return info.param.test_name;
});

class TimestampHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
protected:
std::vector<std::shared_ptr<Transform>> transforms_{
Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()};
};

TEST_P(TimestampHumanStringTest, ToHumanString) {
const auto& param = GetParam();
for (uint32_t i = 0; i < transforms_.size(); i++) {
ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type));
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
HasValue(::testing::Eq(param.expecteds[i])));
}
}

INSTANTIATE_TEST_SUITE_P(
TimestampHumanStringTestCases, TimestampHumanStringTest,
::testing::Values(
HumanStringTestParam{
.test_name = "Timestamp",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Timestamp(1512123175038194),
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
HumanStringTestParam{
.test_name = "NegativeTimestamp",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Timestamp(-136024961806),
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
HumanStringTestParam{
.test_name = "TimestampLowerBound",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Timestamp(0),
.expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
HumanStringTestParam{
.test_name = "NegativeTimestampLowerBound",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Timestamp(-172800000000),
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
},
HumanStringTestParam{
.test_name = "NegativeTimestampUpperBound",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Timestamp(-1),
.expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
HumanStringTestParam{
.test_name = "TimestampTz",
.source_type = std::make_shared<TimestampTzType>(),
.literal = Literal::TimestampTz(1512151975038194),
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
HumanStringTestParam{.test_name = "Null",
.source_type = std::make_shared<TimestampType>(),
.literal = Literal::Null(std::make_shared<TimestampType>()),
.expecteds = {"null", "null", "null", "null"}}),
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
return info.param.test_name;
});

} // namespace iceberg
45 changes: 45 additions & 0 deletions src/iceberg/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/projection_util_internal.h"
#include "iceberg/util/transform_util.h"

namespace iceberg {
namespace {
Expand Down Expand Up @@ -366,6 +367,50 @@ Result<std::unique_ptr<UnboundPredicate>> Transform::ProjectStrict(
std::unreachable();
}

Result<std::string> Transform::ToHumanString(const Literal& value) {
if (value.IsNull()) {
Comment thread
WZhuo marked this conversation as resolved.
return "null";
}

switch (transform_type_) {
case TransformType::kYear:
return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
Comment thread
WZhuo marked this conversation as resolved.
case TransformType::kMonth:
return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
case TransformType::kDay:
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
case TransformType::kHour:
return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
default: {
Comment thread
WZhuo marked this conversation as resolved.
Outdated
switch (value.type()->type_id()) {
case TypeId::kDate:
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
case TypeId::kTime:
return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
case TypeId::kTimestamp:
return TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
case TypeId::kTimestampTz:
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
case TypeId::kFixed:
case TypeId::kBinary: {
const auto& binary_data = std::get<std::vector<uint8_t>>(value.value());
return TransformUtil::Base64Encode(
{reinterpret_cast<const char*>(binary_data.data()), binary_data.size()});
}
case TypeId::kDecimal: {
const auto& decimal_type = internal::checked_cast<DecimalType&>(*value.type());
const auto& decimal = std::get<::iceberg::Decimal>(value.value());
return decimal.ToString(decimal_type.scale());
}
case TypeId::kString:
return std::get<std::string>(value.value());
default:
return value.ToString();
}
}
}
}

bool TransformFunction::Equals(const TransformFunction& other) const {
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
}
Expand Down
6 changes: 6 additions & 0 deletions src/iceberg/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);

/// \brief Returns a human-readable String representation of a transformed value.
Comment thread
WZhuo marked this conversation as resolved.
Outdated
///
/// \param value The literal value to be transformed.
/// \return A human-readable String representation of the value
Comment thread
WZhuo marked this conversation as resolved.
Outdated
Result<std::string> ToHumanString(const Literal& value);

/// \brief Returns a string representation of this transform (e.g., "bucket[16]").
std::string ToString() const override;

Expand Down
Loading