Skip to content

Commit ae4e144

Browse files
author
Innocent
committed
feat: literal to type conversaion
1 parent 8bf089f commit ae4e144

File tree

8 files changed

+660
-10
lines changed

8 files changed

+660
-10
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 134 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,140 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
298298
}
299299
}
300300

301-
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
302-
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
303-
// SingleValueParser.fromJson(type, node).
304-
return LiteralFromJson(json);
301+
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
302+
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
303+
if (json.is_object() && json.contains(kType) &&
304+
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
305+
return LiteralFromJson(json[kValue], type);
306+
}
307+
// If no type context is provided, fall back to untyped parsing.
308+
if (type == nullptr) return LiteralFromJson(json);
309+
310+
// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
311+
switch (type->type_id()) {
312+
case TypeId::kBoolean:
313+
if (!json.is_boolean()) [[unlikely]] {
314+
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
315+
}
316+
return Literal::Boolean(json.get<bool>());
317+
318+
case TypeId::kInt:
319+
if (!json.is_number_integer()) [[unlikely]] {
320+
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
321+
}
322+
return Literal::Int(json.get<int32_t>());
323+
324+
case TypeId::kLong:
325+
if (!json.is_number_integer()) [[unlikely]] {
326+
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
327+
}
328+
return Literal::Long(json.get<int64_t>());
329+
330+
case TypeId::kFloat:
331+
if (!json.is_number()) [[unlikely]] {
332+
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
333+
}
334+
return Literal::Float(json.get<float>());
335+
336+
case TypeId::kDouble:
337+
if (!json.is_number()) [[unlikely]] {
338+
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
339+
}
340+
return Literal::Double(json.get<double>());
341+
342+
case TypeId::kString:
343+
if (!json.is_string()) [[unlikely]] {
344+
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
345+
}
346+
return Literal::String(json.get<std::string>());
347+
348+
case TypeId::kDate: {
349+
if (!json.is_string()) [[unlikely]] {
350+
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));
351+
}
352+
ICEBERG_ASSIGN_OR_RAISE(auto days,
353+
TransformUtil::ParseDay(json.get<std::string>()));
354+
return Literal::Date(days);
355+
}
356+
357+
case TypeId::kTime: {
358+
if (!json.is_string()) [[unlikely]] {
359+
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));
360+
}
361+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
362+
TransformUtil::ParseTime(json.get<std::string>()));
363+
return Literal::Time(micros);
364+
}
365+
366+
case TypeId::kTimestamp: {
367+
if (!json.is_string()) [[unlikely]] {
368+
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));
369+
}
370+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
371+
TransformUtil::ParseTimestamp(json.get<std::string>()));
372+
return Literal::Timestamp(micros);
373+
}
374+
375+
case TypeId::kTimestampTz: {
376+
if (!json.is_string()) [[unlikely]] {
377+
return JsonParseError("Cannot parse {} as a timestamptz value",
378+
SafeDumpJson(json));
379+
}
380+
ICEBERG_ASSIGN_OR_RAISE(
381+
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
382+
return Literal::TimestampTz(micros);
383+
}
384+
385+
case TypeId::kUuid: {
386+
if (!json.is_string()) [[unlikely]] {
387+
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
388+
}
389+
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
390+
return Literal::UUID(uuid);
391+
}
392+
393+
case TypeId::kBinary: {
394+
if (!json.is_string()) [[unlikely]] {
395+
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));
396+
}
397+
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
398+
StringUtils::HexStringToBytes(json.get<std::string>()));
399+
return Literal::Binary(std::move(bytes));
400+
}
401+
402+
case TypeId::kFixed: {
403+
if (!json.is_string()) [[unlikely]] {
404+
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
405+
}
406+
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
407+
const std::string& hex = json.get<std::string>();
408+
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]] {
409+
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
410+
fixed_type.length(), fixed_type.length() * 2, hex.size());
411+
}
412+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
413+
return Literal::Fixed(std::move(bytes));
414+
}
415+
416+
case TypeId::kDecimal: {
417+
if (!json.is_string()) [[unlikely]] {
418+
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
419+
}
420+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
421+
int32_t parsed_scale = 0;
422+
ICEBERG_ASSIGN_OR_RAISE(
423+
auto dec, Decimal::FromString(json.get<std::string>(), nullptr, &parsed_scale));
424+
if (parsed_scale != dec_type.scale()) [[unlikely]] {
425+
return JsonParseError("Cannot parse {} as a {} value: the scale doesn't match",
426+
SafeDumpJson(json), type->ToString());
427+
}
428+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
429+
}
430+
431+
default:
432+
return NotSupported("Unsupported type for literal JSON parsing: {}",
433+
type->ToString());
434+
}
305435
}
306436

307437
Result<Literal> LiteralFromJson(const nlohmann::json& json) {

src/iceberg/expression/literal.cc

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
#include <concepts>
2424
#include <cstdint>
2525
#include <string>
26+
#include <vector>
2627

28+
#include "iceberg/type.h"
2729
#include "iceberg/util/checked_cast.h"
2830
#include "iceberg/util/conversions.h"
31+
#include "iceberg/util/decimal.h"
2932
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/string_util.h"
3034
#include "iceberg/util/temporal_util.h"
35+
#include "iceberg/util/transform_util.h"
3136

3237
namespace iceberg {
3338

@@ -193,12 +198,42 @@ Result<Literal> LiteralCaster::CastFromString(
193198
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
194199
return Literal::UUID(uuid);
195200
}
196-
case TypeId::kDate:
197-
case TypeId::kTime:
198-
case TypeId::kTimestamp:
199-
case TypeId::kTimestampTz:
200-
return NotImplemented("Cast from String to {} is not implemented yet",
201-
target_type->ToString());
201+
case TypeId::kDate: {
202+
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
203+
return Literal::Date(days);
204+
}
205+
case TypeId::kTime: {
206+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
207+
return Literal::Time(micros);
208+
}
209+
case TypeId::kTimestamp: {
210+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
211+
return Literal::Timestamp(micros);
212+
}
213+
case TypeId::kTimestampTz: {
214+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
215+
TransformUtil::ParseTimestampWithZone(str_val));
216+
return Literal::TimestampTz(micros);
217+
}
218+
case TypeId::kBinary: {
219+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
220+
return Literal::Binary(std::move(bytes));
221+
}
222+
case TypeId::kFixed: {
223+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
224+
return Literal::Fixed(std::move(bytes));
225+
}
226+
case TypeId::kDecimal: {
227+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
228+
int32_t parsed_scale = 0;
229+
ICEBERG_ASSIGN_OR_RAISE(auto dec,
230+
Decimal::FromString(str_val, nullptr, &parsed_scale));
231+
if (parsed_scale != dec_type.scale()) {
232+
return InvalidArgument("Cannot cast {} as a {} value: the scale doesn't match",
233+
str_val, target_type->ToString());
234+
}
235+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
236+
}
202237
default:
203238
return NotSupported("Cast from String to {} is not supported",
204239
target_type->ToString());

src/iceberg/test/expression_json_test.cc

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919

2020
#include <memory>
21+
#include <optional>
2122
#include <string>
2223
#include <vector>
2324

@@ -31,6 +32,7 @@
3132
#include "iceberg/expression/literal.h"
3233
#include "iceberg/expression/predicate.h"
3334
#include "iceberg/schema.h"
35+
#include "iceberg/schema_field.h"
3436
#include "iceberg/test/matchers.h"
3537
#include "iceberg/type.h"
3638

@@ -405,4 +407,99 @@ INSTANTIATE_TEST_SUITE_P(
405407
return info.param.name;
406408
});
407409

410+
// --- LiteralFromJson(json, type) type-aware tests ---
411+
412+
struct LiteralFromJsonTypedParam {
413+
std::string name;
414+
nlohmann::json json;
415+
std::shared_ptr<Type> type;
416+
TypeId expected_type_id;
417+
std::optional<std::string> expected_str;
418+
};
419+
420+
class LiteralFromJsonTypedTest
421+
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};
422+
423+
TEST_P(LiteralFromJsonTypedTest, Parses) {
424+
const auto& p = GetParam();
425+
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
426+
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
427+
if (p.expected_str) EXPECT_EQ(lit.ToString(), *p.expected_str);
428+
}
429+
430+
INSTANTIATE_TEST_SUITE_P(
431+
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
432+
::testing::Values(
433+
LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true), boolean(),
434+
TypeId::kBoolean, "true"},
435+
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(), TypeId::kInt,
436+
"123"},
437+
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL), int64(),
438+
TypeId::kLong, "9876543210"},
439+
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(), TypeId::kFloat,
440+
std::nullopt},
441+
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
442+
TypeId::kDouble, std::nullopt},
443+
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"), string(),
444+
TypeId::kString, std::nullopt},
445+
LiteralFromJsonTypedParam{"DateString", nlohmann::json("2024-01-15"), date(),
446+
TypeId::kDate, std::nullopt},
447+
LiteralFromJsonTypedParam{"Uuid",
448+
nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
449+
uuid(), TypeId::kUuid, std::nullopt},
450+
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"), binary(),
451+
TypeId::kBinary, std::nullopt},
452+
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"), fixed(4),
453+
TypeId::kFixed, std::nullopt},
454+
LiteralFromJsonTypedParam{"DecimalMatchingScale", nlohmann::json("123.4500"),
455+
decimal(9, 4), TypeId::kDecimal, "123.4500"},
456+
LiteralFromJsonTypedParam{"DecimalScaleZero", nlohmann::json("2"), decimal(9, 0),
457+
TypeId::kDecimal, "2"}),
458+
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
459+
return info.param.name;
460+
});
461+
462+
struct InvalidLiteralFromJsonTypedParam {
463+
std::string name;
464+
nlohmann::json json;
465+
std::shared_ptr<Type> type;
466+
};
467+
468+
class InvalidLiteralFromJsonTypedTest
469+
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};
470+
471+
TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
472+
const auto& p = GetParam();
473+
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
474+
}
475+
476+
INSTANTIATE_TEST_SUITE_P(
477+
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
478+
::testing::Values(
479+
InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch", nlohmann::json(42),
480+
boolean()},
481+
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch", nlohmann::json(true),
482+
date()},
483+
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch", nlohmann::json(42), uuid()},
484+
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex", nlohmann::json("xyz"),
485+
binary()},
486+
InvalidLiteralFromJsonTypedParam{"FixedLengthMismatch", nlohmann::json("cafe12"),
487+
fixed(4)},
488+
InvalidLiteralFromJsonTypedParam{"DecimalScaleMismatch", nlohmann::json("123.45"),
489+
decimal(9, 4)},
490+
InvalidLiteralFromJsonTypedParam{"DecimalNotString", nlohmann::json(123.45),
491+
decimal(9, 2)}),
492+
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
493+
return info.param.name;
494+
});
495+
496+
TEST(LiteralFromJsonTyped, SchemaAwareDatePredicateRoundTrip) {
497+
auto schema = std::make_shared<Schema>(
498+
std::vector<SchemaField>{SchemaField::MakeOptional(1, "event_date", date())});
499+
nlohmann::json pred_json = {
500+
{"type", "eq"}, {"term", "event_date"}, {"value", "2024-01-15"}};
501+
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
502+
ASSERT_NE(expr, nullptr);
503+
}
504+
408505
} // namespace iceberg

src/iceberg/test/literal_test.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,37 @@ INSTANTIATE_TEST_SUITE_P(
787787
.target_type = uuid(),
788788
.expected_literal = Literal::UUID(
789789
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
790+
CastLiteralTestParam{.test_name = "StringToDate",
791+
.source_literal = Literal::String("2024-01-16"),
792+
.target_type = date(),
793+
.expected_literal = Literal::Date(19738)},
794+
CastLiteralTestParam{.test_name = "StringToTime",
795+
.source_literal = Literal::String("14:30"),
796+
.target_type = time(),
797+
.expected_literal = Literal::Time(52200000000LL)},
798+
CastLiteralTestParam{.test_name = "StringToTimestamp",
799+
.source_literal = Literal::String("2026-01-01T00:00:01.500"),
800+
.target_type = timestamp(),
801+
.expected_literal = Literal::Timestamp(1767225601500000L)},
802+
CastLiteralTestParam{
803+
.test_name = "StringToTimestampTz",
804+
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
805+
.target_type = timestamp_tz(),
806+
.expected_literal = Literal::TimestampTz(1767225601500000L)},
807+
CastLiteralTestParam{.test_name = "StringToBinary",
808+
.source_literal = Literal::String("010203FF"),
809+
.target_type = binary(),
810+
.expected_literal = Literal::Binary(std::vector<uint8_t>{
811+
0x01, 0x02, 0x03, 0xFF})},
812+
CastLiteralTestParam{.test_name = "StringToFixed",
813+
.source_literal = Literal::String("01020304"),
814+
.target_type = fixed(4),
815+
.expected_literal = Literal::Fixed(std::vector<uint8_t>{
816+
0x01, 0x02, 0x03, 0x04})},
817+
CastLiteralTestParam{.test_name = "StringToDecimal",
818+
.source_literal = Literal::String("1234.56"),
819+
.target_type = decimal(6, 2),
820+
.expected_literal = Literal::Decimal(123456, 6, 2)},
790821
// Same type cast test
791822
CastLiteralTestParam{.test_name = "IntToInt",
792823
.source_literal = Literal::Int(42),

0 commit comments

Comments
 (0)