Skip to content

Commit b98cdd4

Browse files
committed
feat: Implement Type Casting, toString for Literals
1 parent 88f5520 commit b98cdd4

File tree

3 files changed

+484
-18
lines changed

3 files changed

+484
-18
lines changed

src/iceberg/expression/literal.cc

Lines changed: 212 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,22 @@
2222
#include <cmath>
2323
#include <concepts>
2424

25+
#include <iceberg/result.h>
26+
2527
#include "iceberg/exception.h"
2628

2729
namespace iceberg {
2830

31+
namespace {
32+
33+
constexpr int64_t kMicrosPerDay = 86400000000LL; // 24 * 60 * 60 * 1000 * 1000
34+
35+
int32_t MicrosToDays(int64_t micros) {
36+
return static_cast<int32_t>(std::floor(static_cast<double>(micros) / kMicrosPerDay));
37+
}
38+
39+
} // namespace
40+
2941
/// \brief LiteralCaster handles type casting operations for Literal.
3042
/// This is an internal implementation class.
3143
class LiteralCaster {
@@ -52,6 +64,30 @@ class LiteralCaster {
5264
/// Cast from Float type to target type.
5365
static Result<Literal> CastFromFloat(const Literal& literal,
5466
const std::shared_ptr<PrimitiveType>& target_type);
67+
68+
/// Cast from Double type to target type.
69+
static Result<Literal> CastFromDouble(
70+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
71+
72+
/// Cast from String type to target type.
73+
static Result<Literal> CastFromString(
74+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
75+
76+
/// Cast from Timestamp type to target type.
77+
static Result<Literal> CastFromTimestamp(
78+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
79+
80+
/// Cast from TimestampTz type to target type.
81+
static Result<Literal> CastFromTimestampTz(
82+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
83+
84+
/// Cast from Binary type to target type.
85+
static Result<Literal> CastFromBinary(
86+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
87+
88+
/// Cast from Fixed type to target type.
89+
static Result<Literal> CastFromFixed(const Literal& literal,
90+
const std::shared_ptr<PrimitiveType>& target_type);
5591
};
5692

5793
Literal LiteralCaster::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) {
@@ -74,6 +110,8 @@ Result<Literal> LiteralCaster::CastFromInt(
74110
return Literal::Float(static_cast<float>(int_val));
75111
case TypeId::kDouble:
76112
return Literal::Double(static_cast<double>(int_val));
113+
case TypeId::kDate:
114+
return Literal::Date(int_val);
77115
default:
78116
return NotSupported("Cast from Int to {} is not implemented",
79117
target_type->ToString());
@@ -83,15 +121,14 @@ Result<Literal> LiteralCaster::CastFromInt(
83121
Result<Literal> LiteralCaster::CastFromLong(
84122
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
85123
auto long_val = std::get<int64_t>(literal.value_);
86-
auto target_type_id = target_type->type_id();
87124

88-
switch (target_type_id) {
125+
switch (target_type->type_id()) {
89126
case TypeId::kInt: {
90127
// Check for overflow
91-
if (long_val >= std::numeric_limits<int32_t>::max()) {
128+
if (long_val > std::numeric_limits<int32_t>::max()) {
92129
return AboveMaxLiteral(target_type);
93130
}
94-
if (long_val <= std::numeric_limits<int32_t>::min()) {
131+
if (long_val < std::numeric_limits<int32_t>::min()) {
95132
return BelowMinLiteral(target_type);
96133
}
97134
return Literal::Int(static_cast<int32_t>(long_val));
@@ -100,6 +137,21 @@ Result<Literal> LiteralCaster::CastFromLong(
100137
return Literal::Float(static_cast<float>(long_val));
101138
case TypeId::kDouble:
102139
return Literal::Double(static_cast<double>(long_val));
140+
case TypeId::kDate: {
141+
if (long_val > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
142+
return AboveMaxLiteral(target_type);
143+
}
144+
if (long_val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
145+
return BelowMinLiteral(target_type);
146+
}
147+
return Literal::Date(static_cast<int32_t>(long_val));
148+
}
149+
case TypeId::kTime:
150+
return Literal::Time(long_val);
151+
case TypeId::kTimestamp:
152+
return Literal::Timestamp(long_val);
153+
case TypeId::kTimestampTz:
154+
return Literal::TimestampTz(long_val);
103155
default:
104156
return NotSupported("Cast from Long to {} is not supported",
105157
target_type->ToString());
@@ -109,9 +161,8 @@ Result<Literal> LiteralCaster::CastFromLong(
109161
Result<Literal> LiteralCaster::CastFromFloat(
110162
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
111163
auto float_val = std::get<float>(literal.value_);
112-
auto target_type_id = target_type->type_id();
113164

114-
switch (target_type_id) {
165+
switch (target_type->type_id()) {
115166
case TypeId::kDouble:
116167
return Literal::Double(static_cast<double>(float_val));
117168
default:
@@ -120,6 +171,132 @@ Result<Literal> LiteralCaster::CastFromFloat(
120171
}
121172
}
122173

174+
Result<Literal> LiteralCaster::CastFromDouble(
175+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
176+
auto double_val = std::get<double>(literal.value_);
177+
178+
switch (target_type->type_id()) {
179+
case TypeId::kFloat: {
180+
if (double_val > static_cast<double>(std::numeric_limits<float>::max())) {
181+
return AboveMaxLiteral(target_type);
182+
}
183+
if (double_val < -static_cast<double>(std::numeric_limits<float>::max())) {
184+
return BelowMinLiteral(target_type);
185+
}
186+
return Literal::Float(static_cast<float>(double_val));
187+
}
188+
default:
189+
return NotSupported("Cast from Double to {} is not supported",
190+
target_type->ToString());
191+
}
192+
}
193+
194+
Result<Literal> LiteralCaster::CastFromString(
195+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
196+
switch (target_type->type_id()) {
197+
case TypeId::kDate: {
198+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DD" using std::chrono::parse
199+
// once it becomes available in the target libc++.
200+
return NotImplemented("Cast from String to Date is not yet implemented.");
201+
}
202+
203+
case TypeId::kTime: {
204+
// TODO(Li Feiyang): Implement parsing for "HH:MM:SS.ffffff" using
205+
// std::chrono::parse once it becomes available in the target libc++.
206+
return NotImplemented("Cast from String to Time is not yet implemented.");
207+
}
208+
209+
case TypeId::kTimestamp: {
210+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffff" using
211+
// std::chrono::parse once it becomes available in the target libc++.
212+
return NotImplemented("Cast from String to Timestamp is not yet implemented.");
213+
}
214+
215+
case TypeId::kTimestampTz: {
216+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffffZ" using
217+
// std::chrono::parse once it becomes available in the target libc++.
218+
return NotImplemented("Cast from String to TimestampTz is not yet implemented.");
219+
}
220+
221+
default:
222+
return NotSupported("Cast from String to {} is not supported",
223+
target_type->ToString());
224+
}
225+
}
226+
227+
Result<Literal> LiteralCaster::CastFromTimestamp(
228+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
229+
auto timestamp_val = std::get<int64_t>(literal.value_);
230+
231+
switch (target_type->type_id()) {
232+
case TypeId::kDate:
233+
return Literal::Date(MicrosToDays(timestamp_val));
234+
case TypeId::kTimestampTz:
235+
return Literal::TimestampTz(timestamp_val);
236+
default:
237+
return NotSupported("Cast from Timestamp to {} is not supported",
238+
target_type->ToString());
239+
}
240+
}
241+
242+
Result<Literal> LiteralCaster::CastFromTimestampTz(
243+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
244+
auto micros = std::get<int64_t>(literal.value_);
245+
246+
switch (target_type->type_id()) {
247+
case TypeId::kDate: {
248+
return Literal::Date(MicrosToDays(micros));
249+
}
250+
case TypeId::kTimestamp: {
251+
return Literal::Timestamp(micros);
252+
}
253+
default:
254+
return NotSupported("Cast from TimestampTz to {} is not supported",
255+
target_type->ToString());
256+
}
257+
}
258+
259+
Result<Literal> LiteralCaster::CastFromBinary(
260+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
261+
auto binary_val = std::get<std::vector<uint8_t>>(literal.value_);
262+
switch (target_type->type_id()) {
263+
case TypeId::kFixed: {
264+
auto target_fixed_type = std::dynamic_pointer_cast<FixedType>(target_type);
265+
if (binary_val.size() == target_fixed_type->length()) {
266+
return Literal::Fixed(binary_val);
267+
}
268+
return NotSupported("Cannot cast Binary with length {} to Fixed({})",
269+
binary_val.size(), target_fixed_type->length());
270+
}
271+
default:
272+
return NotSupported("Cast from Binary to {} is not supported",
273+
target_type->ToString());
274+
}
275+
}
276+
277+
Result<Literal> LiteralCaster::CastFromFixed(
278+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
279+
const auto& fixed_val = std::get<std::vector<uint8_t>>(literal.value_);
280+
281+
switch (target_type->type_id()) {
282+
case TypeId::kBinary: {
283+
return Literal::Binary(fixed_val);
284+
}
285+
case TypeId::kFixed: {
286+
auto target_fixed_type = std::dynamic_pointer_cast<FixedType>(target_type);
287+
if (fixed_val.size() ==
288+
target_fixed_type->length()) { // 长度匹配,可以认为是同类型转换,直接返回自身
289+
return literal;
290+
}
291+
return NotSupported("Cannot cast Fixed({}) to Fixed({}) due to mismatched lengths",
292+
fixed_val.size(), target_fixed_type->length());
293+
}
294+
default:
295+
return NotSupported("Cast from Fixed to {} is not supported",
296+
target_type->ToString());
297+
}
298+
}
299+
123300
// Constructor
124301
Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
125302
: value_(std::move(value)), type_(std::move(type)) {}
@@ -149,6 +326,10 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
149326
return {Value{std::move(value)}, binary()};
150327
}
151328

329+
Literal Literal::Fixed(std::vector<uint8_t> value) {
330+
return {Value{std::move(value)}, fixed(value.size())};
331+
}
332+
152333
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
153334
std::shared_ptr<PrimitiveType> type) {
154335
return NotImplemented("Deserialization of Literal is not implemented yet");
@@ -216,6 +397,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
216397
}
217398

218399
case TypeId::kLong:
400+
case TypeId::kTime:
219401
case TypeId::kTimestamp:
220402
case TypeId::kTimestampTz: {
221403
auto this_val = std::get<int64_t>(value_);
@@ -243,7 +425,8 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
243425
return this_val <=> other_val;
244426
}
245427

246-
case TypeId::kBinary: {
428+
case TypeId::kBinary:
429+
case TypeId::kFixed: {
247430
auto& this_val = std::get<std::vector<uint8_t>>(value_);
248431
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
249432
return this_val <=> other_val;
@@ -287,20 +470,26 @@ std::string Literal::ToString() const {
287470
}
288471
case TypeId::kBinary: {
289472
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
290-
std::string result;
291-
result.reserve(binary_data.size() * 2); // 2 chars per byte
473+
std::string result = "X'";
474+
result.reserve(2 + binary_data.size() * 2 +
475+
1); // 2 for X' + 2 chars per byte + 1 for '
292476
for (const auto& byte : binary_data) {
293477
std::format_to(std::back_inserter(result), "{:02X}", byte);
294478
}
479+
result.push_back('\'');
295480
return result;
296481
}
297-
case TypeId::kDecimal:
298-
case TypeId::kUuid:
299-
case TypeId::kFixed:
300-
case TypeId::kDate:
301482
case TypeId::kTime:
302483
case TypeId::kTimestamp:
303484
case TypeId::kTimestampTz: {
485+
return std::to_string(std::get<int64_t>(value_));
486+
}
487+
case TypeId::kDate: {
488+
return std::to_string(std::get<int32_t>(value_));
489+
}
490+
case TypeId::kDecimal:
491+
case TypeId::kUuid:
492+
case TypeId::kFixed: {
304493
throw IcebergError("Not implemented: ToString for " + type_->ToString());
305494
}
306495
default: {
@@ -343,10 +532,18 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
343532
case TypeId::kFloat:
344533
return CastFromFloat(literal, target_type);
345534
case TypeId::kDouble:
346-
case TypeId::kBoolean:
535+
return CastFromDouble(literal, target_type);
347536
case TypeId::kString:
537+
return CastFromString(literal, target_type);
348538
case TypeId::kBinary:
349-
break;
539+
return CastFromBinary(literal, target_type);
540+
case TypeId::kFixed:
541+
return CastFromFixed(literal, target_type);
542+
case TypeId::kTimestamp:
543+
return CastFromTimestamp(literal, target_type);
544+
case TypeId::kTimestampTz:
545+
return CastFromTimestampTz(literal, target_type);
546+
case TypeId::kBoolean:
350547
default:
351548
break;
352549
}

src/iceberg/expression/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class ICEBERG_EXPORT Literal {
7171
static Literal Double(double value);
7272
static Literal String(std::string value);
7373
static Literal Binary(std::vector<uint8_t> value);
74+
static Literal Fixed(std::vector<uint8_t> value);
7475

7576
/// \brief Create a literal representing a null value.
7677
static Literal Null(std::shared_ptr<PrimitiveType> type) {

0 commit comments

Comments
 (0)