Skip to content

Commit e1439d8

Browse files
author
Innocent
committed
feat: literal to type conversaion
1 parent 8bf089f commit e1439d8

File tree

8 files changed

+526
-10
lines changed

8 files changed

+526
-10
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 120 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,126 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
298298
}
299299
}
300300

301-
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
302-
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
303-
// SingleValueParser.fromJson(type, node).
304-
return LiteralFromJson(json);
301+
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
302+
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
303+
if (json.is_object() && json.contains(kType) &&
304+
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
305+
return LiteralFromJson(json[kValue], type);
306+
}
307+
// If no type context is provided, fall back to untyped parsing.
308+
if (type == nullptr) return LiteralFromJson(json);
309+
310+
// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
311+
switch (type->type_id()) {
312+
case TypeId::kBoolean:
313+
if (!json.is_boolean()) [[unlikely]]
314+
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
315+
return Literal::Boolean(json.get<bool>());
316+
317+
case TypeId::kInt:
318+
if (!json.is_number_integer()) [[unlikely]]
319+
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
320+
return Literal::Int(json.get<int32_t>());
321+
322+
case TypeId::kLong:
323+
if (!json.is_number_integer()) [[unlikely]]
324+
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
325+
return Literal::Long(json.get<int64_t>());
326+
327+
case TypeId::kFloat:
328+
if (!json.is_number()) [[unlikely]]
329+
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
330+
return Literal::Float(json.get<float>());
331+
332+
case TypeId::kDouble:
333+
if (!json.is_number()) [[unlikely]]
334+
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
335+
return Literal::Double(json.get<double>());
336+
337+
case TypeId::kString:
338+
if (!json.is_string()) [[unlikely]]
339+
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
340+
return Literal::String(json.get<std::string>());
341+
342+
// For temporal types (date, time, timestamp, timestamp_tz), we support both integer
343+
// and string representations.
344+
case TypeId::kDate:
345+
if (json.is_number_integer()) return Literal::Date(json.get<int32_t>());
346+
if (json.is_string()) {
347+
ICEBERG_ASSIGN_OR_RAISE(auto days,
348+
TransformUtil::ParseDay(json.get<std::string>()));
349+
return Literal::Date(days);
350+
}
351+
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));
352+
353+
case TypeId::kTime:
354+
if (json.is_number_integer()) return Literal::Time(json.get<int64_t>());
355+
if (json.is_string()) {
356+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
357+
TransformUtil::ParseTime(json.get<std::string>()));
358+
return Literal::Time(micros);
359+
}
360+
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));
361+
362+
case TypeId::kTimestamp:
363+
if (json.is_number_integer()) return Literal::Timestamp(json.get<int64_t>());
364+
if (json.is_string()) {
365+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
366+
TransformUtil::ParseTimestamp(json.get<std::string>()));
367+
return Literal::Timestamp(micros);
368+
}
369+
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));
370+
371+
case TypeId::kTimestampTz:
372+
if (json.is_number_integer()) return Literal::TimestampTz(json.get<int64_t>());
373+
if (json.is_string()) {
374+
ICEBERG_ASSIGN_OR_RAISE(
375+
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
376+
return Literal::TimestampTz(micros);
377+
}
378+
return JsonParseError("Cannot parse {} as a timestamptz value", SafeDumpJson(json));
379+
380+
case TypeId::kUuid:
381+
if (json.is_string()) {
382+
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
383+
return Literal::UUID(uuid);
384+
}
385+
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
386+
387+
case TypeId::kBinary:
388+
if (json.is_string()) {
389+
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
390+
StringUtils::HexStringToBytes(json.get<std::string>()));
391+
return Literal::Binary(std::move(bytes));
392+
}
393+
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));
394+
395+
case TypeId::kFixed: {
396+
if (json.is_string()) {
397+
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
398+
const std::string& hex = json.get<std::string>();
399+
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]]
400+
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
401+
fixed_type.length(), fixed_type.length() * 2, hex.size());
402+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
403+
return Literal::Fixed(std::move(bytes));
404+
}
405+
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
406+
}
407+
408+
case TypeId::kDecimal: {
409+
if (json.is_string()) {
410+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
411+
ICEBERG_ASSIGN_OR_RAISE(auto dec, Decimal::FromString(json.get<std::string>()));
412+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
413+
}
414+
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
415+
}
416+
417+
default:
418+
return NotSupported("Unsupported type for literal JSON parsing: {}",
419+
type->ToString());
420+
}
305421
}
306422

307423
Result<Literal> LiteralFromJson(const nlohmann::json& json) {

src/iceberg/expression/literal.cc

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
#include <concepts>
2424
#include <cstdint>
2525
#include <string>
26+
#include <vector>
2627

28+
#include "iceberg/type.h"
2729
#include "iceberg/util/checked_cast.h"
2830
#include "iceberg/util/conversions.h"
31+
#include "iceberg/util/decimal.h"
2932
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/string_util.h"
3034
#include "iceberg/util/temporal_util.h"
35+
#include "iceberg/util/transform_util.h"
3136

3237
namespace iceberg {
3338

@@ -193,12 +198,36 @@ Result<Literal> LiteralCaster::CastFromString(
193198
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
194199
return Literal::UUID(uuid);
195200
}
196-
case TypeId::kDate:
197-
case TypeId::kTime:
198-
case TypeId::kTimestamp:
199-
case TypeId::kTimestampTz:
200-
return NotImplemented("Cast from String to {} is not implemented yet",
201-
target_type->ToString());
201+
case TypeId::kDate: {
202+
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
203+
return Literal::Date(days);
204+
}
205+
case TypeId::kTime: {
206+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
207+
return Literal::Time(micros);
208+
}
209+
case TypeId::kTimestamp: {
210+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
211+
return Literal::Timestamp(micros);
212+
}
213+
case TypeId::kTimestampTz: {
214+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
215+
TransformUtil::ParseTimestampWithZone(str_val));
216+
return Literal::TimestampTz(micros);
217+
}
218+
case TypeId::kBinary: {
219+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
220+
return Literal::Binary(std::move(bytes));
221+
}
222+
case TypeId::kFixed: {
223+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
224+
return Literal::Fixed(std::move(bytes));
225+
}
226+
case TypeId::kDecimal: {
227+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
228+
ICEBERG_ASSIGN_OR_RAISE(auto dec, Decimal::FromString(str_val));
229+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
230+
}
202231
default:
203232
return NotSupported("Cast from String to {} is not supported",
204233
target_type->ToString());

src/iceberg/test/expression_json_test.cc

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919

2020
#include <memory>
21+
#include <optional>
2122
#include <string>
2223
#include <vector>
2324

@@ -31,6 +32,7 @@
3132
#include "iceberg/expression/literal.h"
3233
#include "iceberg/expression/predicate.h"
3334
#include "iceberg/schema.h"
35+
#include "iceberg/schema_field.h"
3436
#include "iceberg/test/matchers.h"
3537
#include "iceberg/type.h"
3638

@@ -405,4 +407,93 @@ INSTANTIATE_TEST_SUITE_P(
405407
return info.param.name;
406408
});
407409

410+
// --- LiteralFromJson(json, type) type-aware tests ---
411+
412+
struct LiteralFromJsonTypedParam {
413+
std::string name;
414+
nlohmann::json json;
415+
std::shared_ptr<Type> type;
416+
TypeId expected_type_id;
417+
std::optional<std::string> expected_str;
418+
};
419+
420+
class LiteralFromJsonTypedTest
421+
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};
422+
423+
TEST_P(LiteralFromJsonTypedTest, Parses) {
424+
const auto& p = GetParam();
425+
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
426+
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
427+
if (p.expected_str) EXPECT_EQ(lit.ToString(), *p.expected_str);
428+
}
429+
430+
INSTANTIATE_TEST_SUITE_P(
431+
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
432+
::testing::Values(LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true),
433+
boolean(), TypeId::kBoolean, "true"},
434+
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(),
435+
TypeId::kInt, "123"},
436+
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL),
437+
int64(), TypeId::kLong, "9876543210"},
438+
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(),
439+
TypeId::kFloat, std::nullopt},
440+
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
441+
TypeId::kDouble, std::nullopt},
442+
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"),
443+
string(), TypeId::kString, std::nullopt},
444+
LiteralFromJsonTypedParam{"DateString",
445+
nlohmann::json("2024-01-15"), date(),
446+
TypeId::kDate, std::nullopt},
447+
LiteralFromJsonTypedParam{"DateOrdinal", nlohmann::json(19738),
448+
date(), TypeId::kDate, std::nullopt},
449+
LiteralFromJsonTypedParam{
450+
"Uuid", nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
451+
uuid(), TypeId::kUuid, std::nullopt},
452+
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"),
453+
binary(), TypeId::kBinary, std::nullopt},
454+
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"),
455+
fixed(4), TypeId::kFixed, std::nullopt}),
456+
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
457+
return info.param.name;
458+
});
459+
460+
struct InvalidLiteralFromJsonTypedParam {
461+
std::string name;
462+
nlohmann::json json;
463+
std::shared_ptr<Type> type;
464+
};
465+
466+
class InvalidLiteralFromJsonTypedTest
467+
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};
468+
469+
TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
470+
const auto& p = GetParam();
471+
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
472+
}
473+
474+
INSTANTIATE_TEST_SUITE_P(
475+
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
476+
::testing::Values(InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch",
477+
nlohmann::json(42), boolean()},
478+
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch",
479+
nlohmann::json(true), date()},
480+
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch",
481+
nlohmann::json(42), uuid()},
482+
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex",
483+
nlohmann::json("xyz"), binary()},
484+
InvalidLiteralFromJsonTypedParam{
485+
"FixedLengthMismatch", nlohmann::json("cafe12"), fixed(4)}),
486+
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
487+
return info.param.name;
488+
});
489+
490+
TEST(LiteralFromJsonTyped, SchemaAwareDatePredicateRoundTrip) {
491+
auto schema = std::make_shared<Schema>(
492+
std::vector<SchemaField>{SchemaField::MakeOptional(1, "event_date", date())});
493+
nlohmann::json pred_json = {
494+
{"type", "eq"}, {"term", "event_date"}, {"value", "2024-01-15"}};
495+
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
496+
ASSERT_NE(expr, nullptr);
497+
}
498+
408499
} // namespace iceberg

src/iceberg/test/literal_test.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,37 @@ INSTANTIATE_TEST_SUITE_P(
787787
.target_type = uuid(),
788788
.expected_literal = Literal::UUID(
789789
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
790+
CastLiteralTestParam{.test_name = "StringToDate",
791+
.source_literal = Literal::String("2024-01-16"),
792+
.target_type = date(),
793+
.expected_literal = Literal::Date(19738)},
794+
CastLiteralTestParam{.test_name = "StringToTime",
795+
.source_literal = Literal::String("14:30"),
796+
.target_type = time(),
797+
.expected_literal = Literal::Time(52200000000LL)},
798+
CastLiteralTestParam{.test_name = "StringToTimestamp",
799+
.source_literal = Literal::String("2026-01-01T00:00:01.500"),
800+
.target_type = timestamp(),
801+
.expected_literal = Literal::Timestamp(1767225601500000L)},
802+
CastLiteralTestParam{
803+
.test_name = "StringToTimestampTz",
804+
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
805+
.target_type = timestamp_tz(),
806+
.expected_literal = Literal::TimestampTz(1767225601500000L)},
807+
CastLiteralTestParam{.test_name = "StringToBinary",
808+
.source_literal = Literal::String("010203FF"),
809+
.target_type = binary(),
810+
.expected_literal = Literal::Binary(std::vector<uint8_t>{
811+
0x01, 0x02, 0x03, 0xFF})},
812+
CastLiteralTestParam{.test_name = "StringToFixed",
813+
.source_literal = Literal::String("01020304"),
814+
.target_type = fixed(4),
815+
.expected_literal = Literal::Fixed(std::vector<uint8_t>{
816+
0x01, 0x02, 0x03, 0x04})},
817+
CastLiteralTestParam{.test_name = "StringToDecimal",
818+
.source_literal = Literal::String("1234.56"),
819+
.target_type = decimal(6, 2),
820+
.expected_literal = Literal::Decimal(123456, 6, 2)},
790821
// Same type cast test
791822
CastLiteralTestParam{.test_name = "IntToInt",
792823
.source_literal = Literal::Int(42),

0 commit comments

Comments
 (0)