Skip to content

Commit 8c53971

Browse files
committed
complete
1 parent 7d7b9ec commit 8c53971

File tree

2 files changed

+195
-7
lines changed

2 files changed

+195
-7
lines changed

src/iceberg/expression/literal.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -683,22 +683,18 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
683683
}
684684

685685
case TypeId::kString: {
686-
// UTF-8 bytes (without length) - any size is valid
687686
return Literal::String(
688687
std::string(reinterpret_cast<const char*>(data.data()), data.size()));
689688
}
690689

691690
case TypeId::kBinary: {
692-
// Binary value (without length) - any size is valid
693691
return Literal::Binary(std::vector<uint8_t>(data.begin(), data.end()));
694692
}
695693

696694
case TypeId::kFixed: {
697-
// Fixed(L) - Binary value
698-
// Store in std::vector<uint8_t> or std::array<uint8_t, 16> depending on size
699695
if (data.size() == 16) {
700696
std::array<uint8_t, 16> fixed_bytes;
701-
std::copy(data.begin(), data.end(), fixed_bytes.begin());
697+
std::ranges::copy(data, fixed_bytes.begin());
702698
return Literal(Literal::Value{fixed_bytes}, type);
703699
} else {
704700
return Literal(Literal::Value{std::vector<uint8_t>(data.begin(), data.end())},
@@ -715,8 +711,6 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
715711
}
716712

717713
case TypeId::kDecimal: {
718-
// Decimal values can have variable length, but we store them in std::array<uint8_t,
719-
// 16>
720714
if (data.size() > 16) {
721715
return InvalidArgument(
722716
"Decimal data too large, maximum 16 bytes supported, got {}", data.size());

test/literal_test.cc

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "iceberg/expression/literal.h"
2121

22+
#include <algorithm>
2223
#include <limits>
2324
#include <numbers>
2425
#include <vector>
@@ -383,4 +384,197 @@ TEST(LiteralTest, DoubleZeroComparison) {
383384
EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
384385
}
385386

387+
void CheckBinaryRoundTrip(const std::vector<uint8_t>& input_bytes,
388+
const Literal& expected_literal,
389+
std::shared_ptr<PrimitiveType> type) {
390+
// Deserialize from bytes
391+
auto literal_result = Literal::Deserialize(input_bytes, type);
392+
ASSERT_TRUE(literal_result.has_value());
393+
394+
// Check type and value are correct
395+
EXPECT_EQ(literal_result->type()->type_id(), expected_literal.type()->type_id());
396+
EXPECT_EQ(literal_result->ToString(), expected_literal.ToString());
397+
398+
// Serialize back to bytes
399+
auto bytes_result = literal_result->Serialize();
400+
ASSERT_TRUE(bytes_result.has_value());
401+
EXPECT_EQ(*bytes_result, input_bytes);
402+
403+
// Deserialize again to verify
404+
auto final_literal = Literal::Deserialize(*bytes_result, type);
405+
ASSERT_TRUE(final_literal.has_value());
406+
EXPECT_EQ(final_literal->type()->type_id(), expected_literal.type()->type_id());
407+
EXPECT_EQ(final_literal->ToString(), expected_literal.ToString());
408+
}
409+
410+
// Boolean binary serialization tests
411+
TEST(LiteralSerializationTest, BinaryBoolean) {
412+
CheckBinaryRoundTrip({1}, Literal::Boolean(true), boolean());
413+
CheckBinaryRoundTrip({0}, Literal::Boolean(false), boolean());
414+
}
415+
416+
// Integer binary serialization tests
417+
TEST(LiteralSerializationTest, BinaryInt) {
418+
CheckBinaryRoundTrip({32, 0, 0, 0}, Literal::Int(32), int32());
419+
}
420+
421+
// Long binary serialization tests
422+
TEST(LiteralSerializationTest, BinaryLong) {
423+
CheckBinaryRoundTrip({32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64());
424+
}
425+
426+
// Float binary serialization tests
427+
TEST(LiteralSerializationTest, BinaryFloat) {
428+
CheckBinaryRoundTrip({0, 0, 128, 63}, Literal::Float(1.0f), float32());
429+
}
430+
431+
// Double binary serialization tests
432+
TEST(LiteralSerializationTest, BinaryDouble) {
433+
CheckBinaryRoundTrip({0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0), float64());
434+
}
435+
436+
// String binary serialization tests
437+
TEST(LiteralSerializationTest, BinaryString) {
438+
CheckBinaryRoundTrip({105, 99, 101, 98, 101, 114, 103}, Literal::String("iceberg"),
439+
string());
440+
}
441+
442+
// Binary data type serialization tests
443+
TEST(LiteralSerializationTest, BinaryData) {
444+
std::vector<uint8_t> data = {0x01, 0x02, 0x03, 0xFF};
445+
CheckBinaryRoundTrip(data, Literal::Binary(data), binary());
446+
}
447+
448+
// Type promotion tests - smaller types can be deserialized as larger types
449+
TEST(LiteralSerializationTest, TypePromotion) {
450+
// 4-byte int data can be deserialized as long
451+
std::vector<uint8_t> int_data = {32, 0, 0, 0};
452+
auto long_result = Literal::Deserialize(int_data, int64());
453+
ASSERT_TRUE(long_result.has_value());
454+
EXPECT_EQ(long_result->type()->type_id(), TypeId::kLong);
455+
EXPECT_EQ(long_result->ToString(), "32");
456+
457+
auto long_bytes = long_result->Serialize();
458+
ASSERT_TRUE(long_bytes.has_value());
459+
EXPECT_EQ(long_bytes->size(), 8);
460+
461+
// 4-byte float data can be deserialized as double
462+
std::vector<uint8_t> float_data = {0, 0, 128, 63};
463+
auto double_result = Literal::Deserialize(float_data, float64());
464+
ASSERT_TRUE(double_result.has_value());
465+
EXPECT_EQ(double_result->type()->type_id(), TypeId::kDouble);
466+
EXPECT_EQ(double_result->ToString(), "1.000000");
467+
468+
auto double_bytes = double_result->Serialize();
469+
ASSERT_TRUE(double_bytes.has_value());
470+
EXPECT_EQ(double_bytes->size(), 8);
471+
}
472+
473+
// Null value serialization tests
474+
TEST(LiteralSerializationTest, NullValues) {
475+
// Empty byte array should deserialize to null
476+
auto null_result = Literal::Deserialize({}, int32());
477+
ASSERT_TRUE(null_result.has_value());
478+
EXPECT_TRUE(null_result->IsNull());
479+
480+
// Null value serialization should return empty byte array
481+
auto null_literal = Literal::Null(int32());
482+
auto bytes_result = null_literal.Serialize();
483+
ASSERT_TRUE(bytes_result.has_value());
484+
EXPECT_TRUE(bytes_result->empty());
485+
}
486+
487+
// Edge case serialization tests
488+
TEST(LiteralSerializationTest, EdgeCases) {
489+
// Negative integers
490+
CheckBinaryRoundTrip({224, 255, 255, 255}, Literal::Int(-32), int32());
491+
CheckBinaryRoundTrip({224, 255, 255, 255, 255, 255, 255, 255}, Literal::Long(-32),
492+
int64());
493+
494+
// Empty string special handling: empty string -> empty bytes -> null conversion
495+
auto empty_string = Literal::String("");
496+
auto empty_bytes = empty_string.Serialize();
497+
ASSERT_TRUE(empty_bytes.has_value());
498+
EXPECT_TRUE(empty_bytes->empty());
499+
500+
// Empty bytes deserialize to null, not empty string
501+
auto null_result = Literal::Deserialize(*empty_bytes, string());
502+
ASSERT_TRUE(null_result.has_value());
503+
EXPECT_TRUE(null_result->IsNull());
504+
505+
// Special floating point value serialization
506+
auto nan_float = Literal::Float(std::numeric_limits<float>::quiet_NaN());
507+
auto nan_bytes = nan_float.Serialize();
508+
ASSERT_TRUE(nan_bytes.has_value());
509+
EXPECT_EQ(nan_bytes->size(), 4);
510+
511+
auto inf_float = Literal::Float(std::numeric_limits<float>::infinity());
512+
auto inf_bytes = inf_float.Serialize();
513+
ASSERT_TRUE(inf_bytes.has_value());
514+
EXPECT_EQ(inf_bytes->size(), 4);
515+
}
516+
517+
// Error case serialization tests
518+
TEST(LiteralSerializationTest, ErrorCases) {
519+
// AboveMax/BelowMin values cannot be serialized
520+
auto long_literal =
521+
Literal::Long(static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1);
522+
auto above_max_result = long_literal.CastTo(int32());
523+
ASSERT_TRUE(above_max_result.has_value());
524+
EXPECT_TRUE(above_max_result->IsAboveMax());
525+
526+
auto serialize_result = above_max_result->Serialize();
527+
EXPECT_FALSE(serialize_result.has_value());
528+
529+
// Insufficient data size for deserialization should fail
530+
std::vector<uint8_t> insufficient_int_data = {0x01}; // Need 4 bytes but only have 1
531+
auto insufficient_data = Literal::Deserialize(insufficient_int_data, int32());
532+
EXPECT_FALSE(insufficient_data.has_value());
533+
534+
std::vector<uint8_t> insufficient_long_data = {
535+
0x01, 0x02}; // Need 4 or 8 bytes but only have 2
536+
auto insufficient_long = Literal::Deserialize(insufficient_long_data, int64());
537+
EXPECT_FALSE(insufficient_long.has_value());
538+
539+
// Oversized decimal data should fail
540+
std::vector<uint8_t> oversized_decimal(20, 0xFF); // Exceeds 16-byte limit
541+
auto oversized_result = Literal::Deserialize(oversized_decimal, decimal(10, 2));
542+
EXPECT_FALSE(oversized_result.has_value());
543+
}
544+
545+
// Fixed/UUID/Decimal type serialization tests
546+
TEST(LiteralSerializationTest, FixedUuidDecimal) {
547+
// Fixed type - 16 bytes
548+
std::vector<uint8_t> fixed_16_data(16, 0x42);
549+
auto fixed_result = Literal::Deserialize(fixed_16_data, fixed(16));
550+
ASSERT_TRUE(fixed_result.has_value());
551+
auto fixed_bytes = fixed_result->Serialize();
552+
ASSERT_TRUE(fixed_bytes.has_value());
553+
EXPECT_EQ(*fixed_bytes, fixed_16_data);
554+
555+
// Fixed type - other sizes
556+
std::vector<uint8_t> fixed_8_data(8, 0x33);
557+
auto fixed_8_result = Literal::Deserialize(fixed_8_data, fixed(8));
558+
ASSERT_TRUE(fixed_8_result.has_value());
559+
auto fixed_8_bytes = fixed_8_result->Serialize();
560+
ASSERT_TRUE(fixed_8_bytes.has_value());
561+
EXPECT_EQ(*fixed_8_bytes, fixed_8_data);
562+
563+
// UUID type
564+
std::vector<uint8_t> uuid_data(16, 0x55);
565+
auto uuid_result = Literal::Deserialize(uuid_data, uuid());
566+
ASSERT_TRUE(uuid_result.has_value());
567+
auto uuid_bytes = uuid_result->Serialize();
568+
ASSERT_TRUE(uuid_bytes.has_value());
569+
EXPECT_EQ(*uuid_bytes, uuid_data);
570+
571+
// Decimal type
572+
std::vector<uint8_t> decimal_data(16, 0x99);
573+
auto decimal_result = Literal::Deserialize(decimal_data, decimal(10, 2));
574+
ASSERT_TRUE(decimal_result.has_value());
575+
auto decimal_bytes = decimal_result->Serialize();
576+
ASSERT_TRUE(decimal_bytes.has_value());
577+
EXPECT_EQ(*decimal_bytes, decimal_data);
578+
}
579+
386580
} // namespace iceberg

0 commit comments

Comments
 (0)