Skip to content

Commit 0f44ce2

Browse files
authored
feat: add utility to print transformed value as human string (#501)
1 parent 8295d50 commit 0f44ce2

File tree

8 files changed

+414
-0
lines changed

8 files changed

+414
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ set(ICEBERG_SOURCES
9595
util/snapshot_util.cc
9696
util/temporal_util.cc
9797
util/timepoint.cc
98+
util/transform_util.cc
9899
util/truncate_util.cc
99100
util/type_util.cc
100101
util/url_encoder.cc

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ iceberg_sources = files(
116116
'util/snapshot_util.cc',
117117
'util/temporal_util.cc',
118118
'util/timepoint.cc',
119+
'util/transform_util.cc',
119120
'util/truncate_util.cc',
120121
'util/type_util.cc',
121122
'util/url_encoder.cc',

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ add_iceberg_test(util_test
111111
formatter_test.cc
112112
location_util_test.cc
113113
string_util_test.cc
114+
transform_util_test.cc
114115
truncate_util_test.cc
115116
url_encoder_test.cc
116117
uuid_test.cc

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ iceberg_tests = {
8787
'formatter_test.cc',
8888
'location_util_test.cc',
8989
'string_util_test.cc',
90+
'transform_util_test.cc',
9091
'truncate_util_test.cc',
9192
'url_encoder_test.cc',
9293
'uuid_test.cc',
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/transform_util.h"
21+
22+
#include <gtest/gtest.h>
23+
24+
namespace iceberg {
25+
26+
TEST(TransformUtilTest, HumanYear) {
27+
EXPECT_EQ("1970", TransformUtil::HumanYear(0));
28+
EXPECT_EQ("1971", TransformUtil::HumanYear(1));
29+
EXPECT_EQ("1969", TransformUtil::HumanYear(-1));
30+
EXPECT_EQ("0999", TransformUtil::HumanYear(999 - 1970));
31+
EXPECT_EQ("2026", TransformUtil::HumanYear(56));
32+
}
33+
34+
TEST(TransformUtilTest, HumanMonth) {
35+
// 0 is January 1970
36+
EXPECT_EQ("1970-01", TransformUtil::HumanMonth(0));
37+
// 1 is Febrary 1970
38+
EXPECT_EQ("1970-02", TransformUtil::HumanMonth(1));
39+
// -1 is December 1969
40+
EXPECT_EQ("1969-12", TransformUtil::HumanMonth(-1));
41+
// 0999-12
42+
EXPECT_EQ("0999-12", TransformUtil::HumanMonth(-11641));
43+
// 12 is January 1971
44+
EXPECT_EQ("1971-01", TransformUtil::HumanMonth(12));
45+
// 672 is December 2026-01
46+
EXPECT_EQ("2026-01", TransformUtil::HumanMonth(672));
47+
}
48+
49+
TEST(TransformUtilTest, HumanDay) {
50+
// 0 is Unix epoch (1970-01-01)
51+
EXPECT_EQ("1970-01-01", TransformUtil::HumanDay(0));
52+
// 1 is 1970-01-02
53+
EXPECT_EQ("1970-01-02", TransformUtil::HumanDay(1));
54+
// -1 is 1969-12-31
55+
EXPECT_EQ("1969-12-31", TransformUtil::HumanDay(-1));
56+
// 0999-12-31
57+
EXPECT_EQ("0999-12-31", TransformUtil::HumanDay(-354286));
58+
// 365 is 1971-01-01 (non-leap year)
59+
EXPECT_EQ("1971-01-01", TransformUtil::HumanDay(365));
60+
// 20454 is 2026-01-01
61+
EXPECT_EQ("2026-01-01", TransformUtil::HumanDay(20454));
62+
}
63+
64+
TEST(TransformUtilTest, HumanHour) {
65+
// 0 is Unix epoch at 00:00
66+
EXPECT_EQ("1970-01-01-00", TransformUtil::HumanHour(0));
67+
// 1 is first hour of epoch
68+
EXPECT_EQ("1970-01-01-01", TransformUtil::HumanHour(1));
69+
// -1 is previous day's last hour
70+
EXPECT_EQ("1969-12-31-23", TransformUtil::HumanHour(-1));
71+
// 999-12-31 at 23:00
72+
EXPECT_EQ("0999-12-31-23", TransformUtil::HumanHour(-8502841));
73+
// 24 is next day at 00:00
74+
EXPECT_EQ("1970-01-02-00", TransformUtil::HumanHour(24));
75+
// 490896 is 2026-01-01 at 00:00
76+
EXPECT_EQ("2026-01-01-00", TransformUtil::HumanHour(490896));
77+
}
78+
79+
TEST(TransformUtilTest, HumanTime) {
80+
// Midnight
81+
EXPECT_EQ("00:00", TransformUtil::HumanTime(0));
82+
// 1 second after midnight
83+
EXPECT_EQ("00:00:01", TransformUtil::HumanTime(1000000));
84+
// 1.5 seconds after midnight
85+
EXPECT_EQ("00:00:01.500", TransformUtil::HumanTime(1500000));
86+
// 1.001 seconds after midnight
87+
EXPECT_EQ("00:00:01.001", TransformUtil::HumanTime(1001000));
88+
// 1.000001 seconds after midnight
89+
EXPECT_EQ("00:00:01.000001", TransformUtil::HumanTime(1000001));
90+
// 1 hour, 2 minutes, 3 seconds
91+
EXPECT_EQ("01:02:03", TransformUtil::HumanTime(3723000000));
92+
// 23:59:59
93+
EXPECT_EQ("23:59:59", TransformUtil::HumanTime(86399000000));
94+
}
95+
96+
TEST(TransformUtilTest, HumanTimestamp) {
97+
// Unix epoch
98+
EXPECT_EQ("1970-01-01T00:00:00", TransformUtil::HumanTimestamp(0));
99+
// 1 second after epoch
100+
EXPECT_EQ("1970-01-01T00:00:01", TransformUtil::HumanTimestamp(1000000));
101+
// 1 second before epoch
102+
EXPECT_EQ("1969-12-31T23:59:59", TransformUtil::HumanTimestamp(-1000000));
103+
// 0999-12-31T23:59:59
104+
EXPECT_EQ("0999-12-31T23:59:59", TransformUtil::HumanTimestamp(-30610224001000000L));
105+
// precistion with 500 milliseconds
106+
EXPECT_EQ("2026-01-01T00:00:01.500", TransformUtil::HumanTimestamp(1767225601500000L));
107+
// precision with 1 millisecond
108+
EXPECT_EQ("2026-01-01T00:00:01.001", TransformUtil::HumanTimestamp(1767225601001000L));
109+
// precision with 1 microsecond
110+
EXPECT_EQ("2026-01-01T00:00:01.000001",
111+
TransformUtil::HumanTimestamp(1767225601000001L));
112+
}
113+
114+
TEST(TransformUtilTest, HumanTimestampWithZone) {
115+
// Unix epoch
116+
EXPECT_EQ("1970-01-01T00:00:00+00:00", TransformUtil::HumanTimestampWithZone(0));
117+
// 1 second after epoch
118+
EXPECT_EQ("1970-01-01T00:00:01+00:00", TransformUtil::HumanTimestampWithZone(1000000));
119+
// 1 second before epoch
120+
EXPECT_EQ("1969-12-31T23:59:59+00:00", TransformUtil::HumanTimestampWithZone(-1000000));
121+
// 0999-12-31T23:59:59
122+
EXPECT_EQ("0999-12-31T23:59:59+00:00",
123+
TransformUtil::HumanTimestampWithZone(-30610224001000000L));
124+
// precistion with 500 milliseconds
125+
EXPECT_EQ("2026-01-01T00:00:01.500+00:00",
126+
TransformUtil::HumanTimestampWithZone(1767225601500000L));
127+
// precision with 1 millisecond
128+
EXPECT_EQ("2026-01-01T00:00:01.001+00:00",
129+
TransformUtil::HumanTimestampWithZone(1767225601001000L));
130+
// precision with 1 microsecond
131+
EXPECT_EQ("2026-01-01T00:00:01.000001+00:00",
132+
TransformUtil::HumanTimestampWithZone(1767225601000001L));
133+
}
134+
135+
TEST(TransformUtilTest, Base64Encode) {
136+
// Empty string
137+
EXPECT_EQ("", TransformUtil::Base64Encode(""));
138+
139+
// Single character
140+
EXPECT_EQ("YQ==", TransformUtil::Base64Encode("a"));
141+
EXPECT_EQ("YWI=", TransformUtil::Base64Encode("ab"));
142+
EXPECT_EQ("YWJj", TransformUtil::Base64Encode("abc"));
143+
144+
// Multiple of 3 characters
145+
EXPECT_EQ("YWJjZGU=", TransformUtil::Base64Encode("abcde"));
146+
EXPECT_EQ("YWJjZGVm", TransformUtil::Base64Encode("abcdef"));
147+
148+
// Common strings
149+
EXPECT_EQ("U29tZSBkYXRhIHdpdGggY2hhcmFjdGVycw==",
150+
TransformUtil::Base64Encode("Some data with characters"));
151+
EXPECT_EQ("aGVsbG8=", TransformUtil::Base64Encode("hello"));
152+
EXPECT_EQ("dGVzdCBzdHJpbmc=", TransformUtil::Base64Encode("test string"));
153+
154+
// Unicode
155+
EXPECT_EQ("8J+EgA==", TransformUtil::Base64Encode("\xF0\x9F\x84\x80"));
156+
// Null byte
157+
EXPECT_EQ("AA==", TransformUtil::Base64Encode({"\x00", 1}));
158+
}
159+
160+
} // namespace iceberg

src/iceberg/util/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ install_headers(
3636
'string_util.h',
3737
'temporal_util.h',
3838
'timepoint.h',
39+
'transform_util.h',
3940
'truncate_util.h',
4041
'type_util.h',
4142
'url_encoder.h',

src/iceberg/util/transform_util.cc

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/transform_util.h"
21+
22+
#include <array>
23+
#include <chrono>
24+
25+
namespace iceberg {
26+
27+
namespace {
28+
constexpr auto kEpochDate = std::chrono::year{1970} / std::chrono::January / 1;
29+
constexpr int64_t kMicrosPerMillis = 1'000;
30+
constexpr int64_t kMicrosPerSecond = 1'000'000;
31+
} // namespace
32+
33+
std::string TransformUtil::HumanYear(int32_t year_ordinal) {
34+
auto y = kEpochDate + std::chrono::years{year_ordinal};
35+
return std::format("{:%Y}", y);
36+
}
37+
38+
std::string TransformUtil::HumanMonth(int32_t month_ordinal) {
39+
auto ym = kEpochDate + std::chrono::months(month_ordinal);
40+
return std::format("{:%Y-%m}", ym);
41+
}
42+
43+
std::string TransformUtil::HumanDay(int32_t day_ordinal) {
44+
auto ymd = std::chrono::sys_days(kEpochDate) + std::chrono::days{day_ordinal};
45+
return std::format("{:%F}", ymd);
46+
}
47+
48+
std::string TransformUtil::HumanHour(int32_t hour_ordinal) {
49+
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::hours>{
50+
std::chrono::hours{hour_ordinal}};
51+
return std::format("{:%F-%H}", tp);
52+
}
53+
54+
std::string TransformUtil::HumanTime(int64_t micros_from_midnight) {
55+
std::chrono::hh_mm_ss<std::chrono::seconds> hms{
56+
std::chrono::seconds{micros_from_midnight / kMicrosPerSecond}};
57+
auto micros = micros_from_midnight % kMicrosPerSecond;
58+
if (micros == 0 && hms.seconds().count() == 0) {
59+
return std::format("{:%R}", hms);
60+
} else if (micros == 0) {
61+
return std::format("{:%T}", hms);
62+
} else if (micros % kMicrosPerMillis == 0) {
63+
return std::format("{:%T}.{:03d}", hms, micros / kMicrosPerMillis);
64+
} else {
65+
return std::format("{:%T}.{:06d}", hms, micros);
66+
}
67+
}
68+
69+
std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) {
70+
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
71+
std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
72+
auto micros = timestamp_micros % kMicrosPerSecond;
73+
if (micros == 0) {
74+
return std::format("{:%FT%T}", tp);
75+
} else if (micros % kMicrosPerMillis == 0) {
76+
return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis);
77+
} else {
78+
return std::format("{:%FT%T}.{:06d}", tp, micros);
79+
}
80+
}
81+
82+
std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) {
83+
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
84+
std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
85+
auto micros = timestamp_micros % kMicrosPerSecond;
86+
if (micros == 0) {
87+
return std::format("{:%FT%T}+00:00", tp);
88+
} else if (micros % kMicrosPerMillis == 0) {
89+
return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis);
90+
} else {
91+
return std::format("{:%FT%T}.{:06d}+00:00", tp, micros);
92+
}
93+
}
94+
95+
std::string TransformUtil::Base64Encode(std::string_view str_to_encode) {
96+
static constexpr std::string_view kBase64Chars =
97+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
98+
int32_t i = 0;
99+
int32_t j = 0;
100+
std::array<unsigned char, 3> char_array_3;
101+
std::array<unsigned char, 4> char_array_4;
102+
103+
std::string encoded;
104+
encoded.reserve((str_to_encode.size() + 2) * 4 / 3);
105+
106+
for (unsigned char byte : str_to_encode) {
107+
char_array_3[i++] = byte;
108+
if (i == 3) {
109+
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
110+
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
111+
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
112+
char_array_4[3] = char_array_3[2] & 0x3f;
113+
114+
for (j = 0; j < 4; j++) {
115+
encoded += kBase64Chars[char_array_4[j]];
116+
}
117+
118+
i = 0;
119+
}
120+
}
121+
122+
if (i) {
123+
for (j = i; j < 3; j++) {
124+
char_array_3[j] = '\0';
125+
}
126+
127+
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
128+
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
129+
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
130+
char_array_4[3] = char_array_3[2] & 0x3f;
131+
132+
for (j = 0; j < i + 1; j++) {
133+
encoded += kBase64Chars[char_array_4[j]];
134+
}
135+
136+
while (i++ < 3) {
137+
encoded += '=';
138+
}
139+
}
140+
141+
return encoded;
142+
}
143+
144+
} // namespace iceberg

0 commit comments

Comments
 (0)