Skip to content

Commit 2bbb95f

Browse files
author
Innocent
committed
feat: literal to type conversaion
1 parent 8bf089f commit 2bbb95f

File tree

8 files changed

+644
-10
lines changed

8 files changed

+644
-10
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 128 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,134 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
298298
}
299299
}
300300

301-
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
302-
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
303-
// SingleValueParser.fromJson(type, node).
304-
return LiteralFromJson(json);
301+
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
302+
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
303+
if (json.is_object() && json.contains(kType) &&
304+
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
305+
return LiteralFromJson(json[kValue], type);
306+
}
307+
// If no type context is provided, fall back to untyped parsing.
308+
if (type == nullptr) return LiteralFromJson(json);
309+
310+
// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
311+
switch (type->type_id()) {
312+
case TypeId::kBoolean:
313+
if (!json.is_boolean()) [[unlikely]]
314+
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
315+
return Literal::Boolean(json.get<bool>());
316+
317+
case TypeId::kInt:
318+
if (!json.is_number_integer()) [[unlikely]]
319+
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
320+
return Literal::Int(json.get<int32_t>());
321+
322+
case TypeId::kLong:
323+
if (!json.is_number_integer()) [[unlikely]]
324+
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
325+
return Literal::Long(json.get<int64_t>());
326+
327+
case TypeId::kFloat:
328+
if (!json.is_number()) [[unlikely]]
329+
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
330+
return Literal::Float(json.get<float>());
331+
332+
case TypeId::kDouble:
333+
if (!json.is_number()) [[unlikely]]
334+
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
335+
return Literal::Double(json.get<double>());
336+
337+
case TypeId::kString:
338+
if (!json.is_string()) [[unlikely]]
339+
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
340+
return Literal::String(json.get<std::string>());
341+
342+
case TypeId::kDate: {
343+
if (!json.is_string()) {
344+
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));
345+
}
346+
ICEBERG_ASSIGN_OR_RAISE(auto days,
347+
TransformUtil::ParseDay(json.get<std::string>()));
348+
return Literal::Date(days);
349+
}
350+
351+
case TypeId::kTime: {
352+
if (!json.is_string()) {
353+
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));
354+
}
355+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
356+
TransformUtil::ParseTime(json.get<std::string>()));
357+
return Literal::Time(micros);
358+
}
359+
360+
case TypeId::kTimestamp: {
361+
if (!json.is_string()) {
362+
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));
363+
}
364+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
365+
TransformUtil::ParseTimestamp(json.get<std::string>()));
366+
return Literal::Timestamp(micros);
367+
}
368+
369+
case TypeId::kTimestampTz: {
370+
if (!json.is_string()) {
371+
return JsonParseError("Cannot parse {} as a timestamptz value",
372+
SafeDumpJson(json));
373+
}
374+
ICEBERG_ASSIGN_OR_RAISE(
375+
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
376+
return Literal::TimestampTz(micros);
377+
}
378+
379+
case TypeId::kUuid: {
380+
if (!json.is_string()) {
381+
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
382+
}
383+
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
384+
return Literal::UUID(uuid);
385+
}
386+
387+
case TypeId::kBinary: {
388+
if (!json.is_string()) {
389+
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));
390+
}
391+
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
392+
StringUtils::HexStringToBytes(json.get<std::string>()));
393+
return Literal::Binary(std::move(bytes));
394+
}
395+
396+
case TypeId::kFixed: {
397+
if (!json.is_string()) {
398+
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
399+
}
400+
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
401+
const std::string& hex = json.get<std::string>();
402+
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]] {
403+
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
404+
fixed_type.length(), fixed_type.length() * 2, hex.size());
405+
}
406+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
407+
return Literal::Fixed(std::move(bytes));
408+
}
409+
410+
case TypeId::kDecimal: {
411+
if (!json.is_string()) {
412+
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
413+
}
414+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
415+
int32_t parsed_scale = 0;
416+
ICEBERG_ASSIGN_OR_RAISE(
417+
auto dec, Decimal::FromString(json.get<std::string>(), nullptr, &parsed_scale));
418+
if (parsed_scale != dec_type.scale()) {
419+
return JsonParseError("Cannot parse {} as a {} value: the scale doesn't match",
420+
SafeDumpJson(json), type->ToString());
421+
}
422+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
423+
}
424+
425+
default:
426+
return NotSupported("Unsupported type for literal JSON parsing: {}",
427+
type->ToString());
428+
}
305429
}
306430

307431
Result<Literal> LiteralFromJson(const nlohmann::json& json) {

src/iceberg/expression/literal.cc

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
#include <concepts>
2424
#include <cstdint>
2525
#include <string>
26+
#include <vector>
2627

28+
#include "iceberg/type.h"
2729
#include "iceberg/util/checked_cast.h"
2830
#include "iceberg/util/conversions.h"
31+
#include "iceberg/util/decimal.h"
2932
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/string_util.h"
3034
#include "iceberg/util/temporal_util.h"
35+
#include "iceberg/util/transform_util.h"
3136

3237
namespace iceberg {
3338

@@ -193,12 +198,42 @@ Result<Literal> LiteralCaster::CastFromString(
193198
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
194199
return Literal::UUID(uuid);
195200
}
196-
case TypeId::kDate:
197-
case TypeId::kTime:
198-
case TypeId::kTimestamp:
199-
case TypeId::kTimestampTz:
200-
return NotImplemented("Cast from String to {} is not implemented yet",
201-
target_type->ToString());
201+
case TypeId::kDate: {
202+
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
203+
return Literal::Date(days);
204+
}
205+
case TypeId::kTime: {
206+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
207+
return Literal::Time(micros);
208+
}
209+
case TypeId::kTimestamp: {
210+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
211+
return Literal::Timestamp(micros);
212+
}
213+
case TypeId::kTimestampTz: {
214+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
215+
TransformUtil::ParseTimestampWithZone(str_val));
216+
return Literal::TimestampTz(micros);
217+
}
218+
case TypeId::kBinary: {
219+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
220+
return Literal::Binary(std::move(bytes));
221+
}
222+
case TypeId::kFixed: {
223+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
224+
return Literal::Fixed(std::move(bytes));
225+
}
226+
case TypeId::kDecimal: {
227+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
228+
int32_t parsed_scale = 0;
229+
ICEBERG_ASSIGN_OR_RAISE(auto dec,
230+
Decimal::FromString(str_val, nullptr, &parsed_scale));
231+
if (parsed_scale != dec_type.scale()) {
232+
return InvalidArgument("Cannot cast {} as a {} value: the scale doesn't match",
233+
str_val, target_type->ToString());
234+
}
235+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
236+
}
202237
default:
203238
return NotSupported("Cast from String to {} is not supported",
204239
target_type->ToString());

src/iceberg/test/expression_json_test.cc

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919

2020
#include <memory>
21+
#include <optional>
2122
#include <string>
2223
#include <vector>
2324

@@ -31,6 +32,7 @@
3132
#include "iceberg/expression/literal.h"
3233
#include "iceberg/expression/predicate.h"
3334
#include "iceberg/schema.h"
35+
#include "iceberg/schema_field.h"
3436
#include "iceberg/test/matchers.h"
3537
#include "iceberg/type.h"
3638

@@ -405,4 +407,99 @@ INSTANTIATE_TEST_SUITE_P(
405407
return info.param.name;
406408
});
407409

410+
// --- LiteralFromJson(json, type) type-aware tests ---
411+
412+
struct LiteralFromJsonTypedParam {
413+
std::string name;
414+
nlohmann::json json;
415+
std::shared_ptr<Type> type;
416+
TypeId expected_type_id;
417+
std::optional<std::string> expected_str;
418+
};
419+
420+
class LiteralFromJsonTypedTest
421+
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};
422+
423+
TEST_P(LiteralFromJsonTypedTest, Parses) {
424+
const auto& p = GetParam();
425+
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
426+
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
427+
if (p.expected_str) EXPECT_EQ(lit.ToString(), *p.expected_str);
428+
}
429+
430+
INSTANTIATE_TEST_SUITE_P(
431+
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
432+
::testing::Values(
433+
LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true), boolean(),
434+
TypeId::kBoolean, "true"},
435+
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(), TypeId::kInt,
436+
"123"},
437+
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL), int64(),
438+
TypeId::kLong, "9876543210"},
439+
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(), TypeId::kFloat,
440+
std::nullopt},
441+
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
442+
TypeId::kDouble, std::nullopt},
443+
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"), string(),
444+
TypeId::kString, std::nullopt},
445+
LiteralFromJsonTypedParam{"DateString", nlohmann::json("2024-01-15"), date(),
446+
TypeId::kDate, std::nullopt},
447+
LiteralFromJsonTypedParam{"Uuid",
448+
nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
449+
uuid(), TypeId::kUuid, std::nullopt},
450+
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"), binary(),
451+
TypeId::kBinary, std::nullopt},
452+
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"), fixed(4),
453+
TypeId::kFixed, std::nullopt},
454+
LiteralFromJsonTypedParam{"DecimalMatchingScale", nlohmann::json("123.4500"),
455+
decimal(9, 4), TypeId::kDecimal, "123.4500"},
456+
LiteralFromJsonTypedParam{"DecimalScaleZero", nlohmann::json("2"), decimal(9, 0),
457+
TypeId::kDecimal, "2"}),
458+
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
459+
return info.param.name;
460+
});
461+
462+
struct InvalidLiteralFromJsonTypedParam {
463+
std::string name;
464+
nlohmann::json json;
465+
std::shared_ptr<Type> type;
466+
};
467+
468+
class InvalidLiteralFromJsonTypedTest
469+
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};
470+
471+
TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
472+
const auto& p = GetParam();
473+
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
474+
}
475+
476+
INSTANTIATE_TEST_SUITE_P(
477+
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
478+
::testing::Values(
479+
InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch", nlohmann::json(42),
480+
boolean()},
481+
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch", nlohmann::json(true),
482+
date()},
483+
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch", nlohmann::json(42), uuid()},
484+
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex", nlohmann::json("xyz"),
485+
binary()},
486+
InvalidLiteralFromJsonTypedParam{"FixedLengthMismatch", nlohmann::json("cafe12"),
487+
fixed(4)},
488+
InvalidLiteralFromJsonTypedParam{"DecimalScaleMismatch", nlohmann::json("123.45"),
489+
decimal(9, 4)},
490+
InvalidLiteralFromJsonTypedParam{"DecimalNotString", nlohmann::json(123.45),
491+
decimal(9, 2)}),
492+
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
493+
return info.param.name;
494+
});
495+
496+
TEST(LiteralFromJsonTyped, SchemaAwareDatePredicateRoundTrip) {
497+
auto schema = std::make_shared<Schema>(
498+
std::vector<SchemaField>{SchemaField::MakeOptional(1, "event_date", date())});
499+
nlohmann::json pred_json = {
500+
{"type", "eq"}, {"term", "event_date"}, {"value", "2024-01-15"}};
501+
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
502+
ASSERT_NE(expr, nullptr);
503+
}
504+
408505
} // namespace iceberg

src/iceberg/test/literal_test.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,37 @@ INSTANTIATE_TEST_SUITE_P(
787787
.target_type = uuid(),
788788
.expected_literal = Literal::UUID(
789789
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
790+
CastLiteralTestParam{.test_name = "StringToDate",
791+
.source_literal = Literal::String("2024-01-16"),
792+
.target_type = date(),
793+
.expected_literal = Literal::Date(19738)},
794+
CastLiteralTestParam{.test_name = "StringToTime",
795+
.source_literal = Literal::String("14:30"),
796+
.target_type = time(),
797+
.expected_literal = Literal::Time(52200000000LL)},
798+
CastLiteralTestParam{.test_name = "StringToTimestamp",
799+
.source_literal = Literal::String("2026-01-01T00:00:01.500"),
800+
.target_type = timestamp(),
801+
.expected_literal = Literal::Timestamp(1767225601500000L)},
802+
CastLiteralTestParam{
803+
.test_name = "StringToTimestampTz",
804+
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
805+
.target_type = timestamp_tz(),
806+
.expected_literal = Literal::TimestampTz(1767225601500000L)},
807+
CastLiteralTestParam{.test_name = "StringToBinary",
808+
.source_literal = Literal::String("010203FF"),
809+
.target_type = binary(),
810+
.expected_literal = Literal::Binary(std::vector<uint8_t>{
811+
0x01, 0x02, 0x03, 0xFF})},
812+
CastLiteralTestParam{.test_name = "StringToFixed",
813+
.source_literal = Literal::String("01020304"),
814+
.target_type = fixed(4),
815+
.expected_literal = Literal::Fixed(std::vector<uint8_t>{
816+
0x01, 0x02, 0x03, 0x04})},
817+
CastLiteralTestParam{.test_name = "StringToDecimal",
818+
.source_literal = Literal::String("1234.56"),
819+
.target_type = decimal(6, 2),
820+
.expected_literal = Literal::Decimal(123456, 6, 2)},
790821
// Same type cast test
791822
CastLiteralTestParam{.test_name = "IntToInt",
792823
.source_literal = Literal::Int(42),

0 commit comments

Comments
 (0)