Skip to content

Commit d8e2630

Browse files
author
Innocent
committed
feat: correctly bind literals after serde
1 parent 0ed2c5e commit d8e2630

File tree

7 files changed

+321
-64
lines changed

7 files changed

+321
-64
lines changed

src/iceberg/expression/json_serde_internal.h

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#pragma once
2121

22-
2322
#include <nlohmann/json_fwd.hpp>
2423

2524
#include "iceberg/expression/expression.h"
@@ -127,63 +126,6 @@ ICEBERG_EXPORT Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJso
127126
/// \return A JSON value representing the term, or an error
128127
ICEBERG_EXPORT Result<nlohmann::json> ToJson(const Term& term);
129128

130-
/// \brief Deserializes a JSON object into a NamedReference.
131-
///
132-
/// \param json A JSON object representing a named reference
133-
/// \return A shared pointer to the deserialized NamedReference or an error
134-
ICEBERG_EXPORT Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
135-
const nlohmann::json& json);
136-
137-
/// \brief Serializes a NamedReference into its JSON representation.
138-
///
139-
/// \param ref The named reference to serialize
140-
/// \return A JSON object representing the named reference
141-
ICEBERG_EXPORT nlohmann::json ToJson(const NamedReference& ref);
142-
143-
/// \brief Serializes an UnboundTransform into its JSON representation.
144-
///
145-
/// \param transform The unbound transform to serialize
146-
/// \return A JSON object representing the unbound transform
147-
ICEBERG_EXPORT nlohmann::json ToJson(const UnboundTransform& transform);
148-
149-
/// \brief Deserializes a JSON object into an UnboundTransform.
150-
///
151-
/// \param json A JSON object representing an unbound transform
152-
/// \return A shared pointer to the deserialized UnboundTransform or an error
153-
ICEBERG_EXPORT Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
154-
const nlohmann::json& json);
155-
156-
/// \brief Serializes a Literal into its JSON representation.
157-
///
158-
/// \param literal The literal to serialize
159-
/// \return A JSON value representing the literal
160-
ICEBERG_EXPORT nlohmann::json ToJson(const Literal& literal);
161-
162-
/// \brief Deserializes a JSON value into a Literal.
163-
///
164-
/// \param json A JSON value representing a literal
165-
/// \return The deserialized Literal or an error
166-
ICEBERG_EXPORT Result<Literal> LiteralFromJson(const nlohmann::json& json);
167-
168-
/// \brief Serializes an UnboundPredicate into its JSON representation.
169-
///
170-
/// \param pred The unbound predicate to serialize
171-
/// \return A JSON object representing the predicate
172-
ICEBERG_EXPORT nlohmann::json ToJson(const UnboundPredicate& pred);
173-
174-
/// \brief Deserializes a JSON object into an UnboundPredicate.
175-
///
176-
/// \param json A JSON object representing an unbound predicate
177-
/// \return A shared pointer to the deserialized UnboundPredicate or an error
178-
ICEBERG_EXPORT Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
179-
const nlohmann::json& json);
180-
181-
/// \brief Serializes a Term into its JSON representation.
182-
///
183-
/// \param term The term to serialize (NamedReference or UnboundTransform)
184-
/// \return A JSON value representing the term
185-
ICEBERG_EXPORT nlohmann::json TermToJson(const Term& term);
186-
187129
/// Check if an operation is a unary predicate
188130
ICEBERG_EXPORT bool IsUnaryOperation(Expression::Operation op);
189131

src/iceberg/expression/literal.cc

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,46 @@
2323
#include <concepts>
2424
#include <cstdint>
2525
#include <string>
26+
#include <vector>
2627

28+
#include "iceberg/type.h"
2729
#include "iceberg/util/checked_cast.h"
2830
#include "iceberg/util/conversions.h"
31+
#include "iceberg/util/decimal.h"
2932
#include "iceberg/util/macros.h"
3033
#include "iceberg/util/temporal_util.h"
34+
#include "iceberg/util/transform_util.h"
3135

3236
namespace iceberg {
3337

38+
namespace {
39+
Result<std::vector<uint8_t>> HexStringToBytes(std::string_view hex) {
40+
if (hex.length() % 2 != 0) {
41+
return InvalidArgument("Hex string must have an even length");
42+
}
43+
44+
std::vector<uint8_t> bytes;
45+
bytes.reserve(hex.length() / 2);
46+
47+
auto to_nibble = [](char c) -> uint8_t {
48+
if (c >= '0' && c <= '9') return c - '0';
49+
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
50+
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
51+
throw std::invalid_argument("Invalid hex character");
52+
};
53+
54+
for (size_t i = 0; i < hex.length(); i += 2) {
55+
try {
56+
bytes.push_back(
57+
static_cast<uint8_t>((to_nibble(hex[i]) << 4) | to_nibble(hex[i + 1])));
58+
} catch (const std::invalid_argument& e) {
59+
return InvalidArgument("Invalid hex character in string: {}", e.what());
60+
}
61+
}
62+
return bytes;
63+
}
64+
} // namespace
65+
3466
/// \brief LiteralCaster handles type casting operations for Literal.
3567
/// This is an internal implementation class.
3668
class LiteralCaster {
@@ -193,12 +225,36 @@ Result<Literal> LiteralCaster::CastFromString(
193225
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
194226
return Literal::UUID(uuid);
195227
}
196-
case TypeId::kDate:
197-
case TypeId::kTime:
198-
case TypeId::kTimestamp:
199-
case TypeId::kTimestampTz:
200-
return NotImplemented("Cast from String to {} is not implemented yet",
201-
target_type->ToString());
228+
case TypeId::kDate: {
229+
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
230+
return Literal::Date(days);
231+
}
232+
case TypeId::kTime: {
233+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
234+
return Literal::Time(micros);
235+
}
236+
case TypeId::kTimestamp: {
237+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
238+
return Literal::Timestamp(micros);
239+
}
240+
case TypeId::kTimestampTz: {
241+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
242+
TransformUtil::ParseTimestampWithZone(str_val));
243+
return Literal::TimestampTz(micros);
244+
}
245+
case TypeId::kBinary: {
246+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, HexStringToBytes(str_val));
247+
return Literal::Binary(std::move(bytes));
248+
}
249+
case TypeId::kFixed: {
250+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, HexStringToBytes(str_val));
251+
return Literal::Fixed(std::move(bytes));
252+
}
253+
case TypeId::kDecimal: {
254+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
255+
ICEBERG_ASSIGN_OR_RAISE(auto dec, Decimal::FromString(str_val));
256+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
257+
}
202258
default:
203259
return NotSupported("Cast from String to {} is not supported",
204260
target_type->ToString());

src/iceberg/test/expression_json_test.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
#include <nlohmann/json.hpp>
2727

2828
#include "iceberg/expression/expression.h"
29+
#include "iceberg/expression/expressions.h"
2930
#include "iceberg/expression/json_serde_internal.h"
3031
#include "iceberg/expression/literal.h"
32+
#include "iceberg/expression/predicate.h"
3133
#include "iceberg/expression/term.h"
3234
#include "iceberg/schema.h"
3335
#include "iceberg/test/matchers.h"

src/iceberg/test/literal_test.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,37 @@ INSTANTIATE_TEST_SUITE_P(
787787
.target_type = uuid(),
788788
.expected_literal = Literal::UUID(
789789
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
790+
CastLiteralTestParam{.test_name = "StringToDate",
791+
.source_literal = Literal::String("2024-01-16"),
792+
.target_type = date(),
793+
.expected_literal = Literal::Date(19738)},
794+
CastLiteralTestParam{.test_name = "StringToTime",
795+
.source_literal = Literal::String("14:30"),
796+
.target_type = time(),
797+
.expected_literal = Literal::Time(52200000000LL)},
798+
CastLiteralTestParam{.test_name = "StringToTimestamp",
799+
.source_literal = Literal::String("2026-01-01T00:00:01.500"),
800+
.target_type = timestamp(),
801+
.expected_literal = Literal::Timestamp(1767225601500000L)},
802+
CastLiteralTestParam{
803+
.test_name = "StringToTimestampTz",
804+
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
805+
.target_type = timestamp_tz(),
806+
.expected_literal = Literal::TimestampTz(1767225601500000L)},
807+
CastLiteralTestParam{.test_name = "StringToBinary",
808+
.source_literal = Literal::String("010203FF"),
809+
.target_type = binary(),
810+
.expected_literal = Literal::Binary(std::vector<uint8_t>{
811+
0x01, 0x02, 0x03, 0xFF})},
812+
CastLiteralTestParam{.test_name = "StringToFixed",
813+
.source_literal = Literal::String("01020304"),
814+
.target_type = fixed(4),
815+
.expected_literal = Literal::Fixed(std::vector<uint8_t>{
816+
0x01, 0x02, 0x03, 0x04})},
817+
CastLiteralTestParam{.test_name = "StringToDecimal",
818+
.source_literal = Literal::String("1234.56"),
819+
.target_type = decimal(6, 2),
820+
.expected_literal = Literal::Decimal(123456, 6, 2)},
790821
// Same type cast test
791822
CastLiteralTestParam{.test_name = "IntToInt",
792823
.source_literal = Literal::Int(42),

src/iceberg/test/transform_util_test.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
#include <gtest/gtest.h>
2323

24+
#include "iceberg/test/matchers.h"
25+
2426
namespace iceberg {
2527

2628
TEST(TransformUtilTest, HumanYear) {
@@ -157,4 +159,97 @@ TEST(TransformUtilTest, Base64Encode) {
157159
EXPECT_EQ("AA==", TransformUtil::Base64Encode({"\x00", 1}));
158160
}
159161

162+
struct ParseRoundTripParam {
163+
std::string name;
164+
std::string str;
165+
int64_t value;
166+
enum Kind { kDay, kTime, kTimestamp, kTimestampTz } kind;
167+
};
168+
169+
class ParseRoundTripTest : public ::testing::TestWithParam<ParseRoundTripParam> {};
170+
171+
TEST_P(ParseRoundTripTest, RoundTrip) {
172+
const auto& param = GetParam();
173+
switch (param.kind) {
174+
case ParseRoundTripParam::kDay: {
175+
EXPECT_EQ(TransformUtil::HumanDay(static_cast<int32_t>(param.value)), param.str);
176+
ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseDay(param.str));
177+
EXPECT_EQ(parsed, static_cast<int32_t>(param.value));
178+
break;
179+
}
180+
case ParseRoundTripParam::kTime: {
181+
EXPECT_EQ(TransformUtil::HumanTime(param.value), param.str);
182+
ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseTime(param.str));
183+
EXPECT_EQ(parsed, param.value);
184+
break;
185+
}
186+
case ParseRoundTripParam::kTimestamp: {
187+
EXPECT_EQ(TransformUtil::HumanTimestamp(param.value), param.str);
188+
ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseTimestamp(param.str));
189+
EXPECT_EQ(parsed, param.value);
190+
break;
191+
}
192+
case ParseRoundTripParam::kTimestampTz: {
193+
EXPECT_EQ(TransformUtil::HumanTimestampWithZone(param.value), param.str);
194+
ICEBERG_UNWRAP_OR_FAIL(auto parsed,
195+
TransformUtil::ParseTimestampWithZone(param.str));
196+
EXPECT_EQ(parsed, param.value);
197+
break;
198+
}
199+
}
200+
}
201+
202+
INSTANTIATE_TEST_SUITE_P(
203+
TransformUtilTest, ParseRoundTripTest,
204+
::testing::Values(
205+
// Day round-trips
206+
ParseRoundTripParam{"DayEpoch", "1970-01-01", 0, ParseRoundTripParam::kDay},
207+
ParseRoundTripParam{"DayNext", "1970-01-02", 1, ParseRoundTripParam::kDay},
208+
ParseRoundTripParam{"DayBeforeEpoch", "1969-12-31", -1,
209+
ParseRoundTripParam::kDay},
210+
ParseRoundTripParam{"DayYear999", "0999-12-31", -354286,
211+
ParseRoundTripParam::kDay},
212+
ParseRoundTripParam{"DayNonLeap", "1971-01-01", 365, ParseRoundTripParam::kDay},
213+
ParseRoundTripParam{"DayY2K", "2000-01-01", 10957, ParseRoundTripParam::kDay},
214+
ParseRoundTripParam{"Day2026", "2026-01-01", 20454, ParseRoundTripParam::kDay},
215+
// Time round-trips
216+
ParseRoundTripParam{"TimeMidnight", "00:00", 0, ParseRoundTripParam::kTime},
217+
ParseRoundTripParam{"TimeOneSec", "00:00:01", 1000000,
218+
ParseRoundTripParam::kTime},
219+
ParseRoundTripParam{"TimeMillis", "00:00:01.500", 1500000,
220+
ParseRoundTripParam::kTime},
221+
ParseRoundTripParam{"TimeOneMillis", "00:00:01.001", 1001000,
222+
ParseRoundTripParam::kTime},
223+
ParseRoundTripParam{"TimeMicros", "00:00:01.000001", 1000001,
224+
ParseRoundTripParam::kTime},
225+
ParseRoundTripParam{"TimeHourMinSec", "01:02:03", 3723000000,
226+
ParseRoundTripParam::kTime},
227+
ParseRoundTripParam{"TimeEndOfDay", "23:59:59", 86399000000,
228+
ParseRoundTripParam::kTime},
229+
// Timestamp round-trips
230+
ParseRoundTripParam{"TimestampEpoch", "1970-01-01T00:00:00", 0,
231+
ParseRoundTripParam::kTimestamp},
232+
ParseRoundTripParam{"TimestampOneSec", "1970-01-01T00:00:01", 1000000,
233+
ParseRoundTripParam::kTimestamp},
234+
ParseRoundTripParam{"TimestampMillis", "2026-01-01T00:00:01.500",
235+
1767225601500000L, ParseRoundTripParam::kTimestamp},
236+
ParseRoundTripParam{"TimestampOneMillis", "2026-01-01T00:00:01.001",
237+
1767225601001000L, ParseRoundTripParam::kTimestamp},
238+
ParseRoundTripParam{"TimestampMicros", "2026-01-01T00:00:01.000001",
239+
1767225601000001L, ParseRoundTripParam::kTimestamp},
240+
// TimestampTz round-trips
241+
ParseRoundTripParam{"TimestampTzEpoch", "1970-01-01T00:00:00+00:00", 0,
242+
ParseRoundTripParam::kTimestampTz},
243+
ParseRoundTripParam{"TimestampTzOneSec", "1970-01-01T00:00:01+00:00", 1000000,
244+
ParseRoundTripParam::kTimestampTz},
245+
ParseRoundTripParam{"TimestampTzMillis", "2026-01-01T00:00:01.500+00:00",
246+
1767225601500000L, ParseRoundTripParam::kTimestampTz},
247+
ParseRoundTripParam{"TimestampTzOneMillis", "2026-01-01T00:00:01.001+00:00",
248+
1767225601001000L, ParseRoundTripParam::kTimestampTz},
249+
ParseRoundTripParam{"TimestampTzMicros", "2026-01-01T00:00:01.000001+00:00",
250+
1767225601000001L, ParseRoundTripParam::kTimestampTz}),
251+
[](const ::testing::TestParamInfo<ParseRoundTripParam>& info) {
252+
return info.param.name;
253+
});
254+
160255
} // namespace iceberg

0 commit comments

Comments
 (0)