|
22 | 22 | #include <gtest/gtest.h> |
23 | 23 |
|
24 | 24 | #include "iceberg/expression/literal.h" |
| 25 | +#include "iceberg/test/matchers.h" |
25 | 26 |
|
26 | 27 | namespace iceberg { |
27 | 28 |
|
@@ -50,4 +51,141 @@ TEST(TruncateUtilTest, TruncateLiteral) { |
50 | 51 | Literal::Binary(std::vector<uint8_t>(expected.begin(), expected.end()))); |
51 | 52 | } |
52 | 53 |
|
| 54 | +TEST(TruncateUtilTest, TruncateBinaryMax) { |
| 55 | + std::vector<uint8_t> test1{1, 1, 2}; |
| 56 | + std::vector<uint8_t> test2{1, 1, 0xFF, 2}; |
| 57 | + std::vector<uint8_t> test3{0xFF, 0xFF, 0xFF, 2}; |
| 58 | + std::vector<uint8_t> test4{1, 1, 0}; |
| 59 | + std::vector<uint8_t> expected_output{1, 2}; |
| 60 | + |
| 61 | + // Test1: truncate {1, 1, 2} to 2 bytes -> {1, 2} |
| 62 | + ICEBERG_UNWRAP_OR_FAIL(auto result1, |
| 63 | + TruncateUtils::TruncateLiteralMax(Literal::Binary(test1), 2)); |
| 64 | + EXPECT_EQ(result1, Literal::Binary(expected_output)); |
| 65 | + |
| 66 | + // Test2: truncate {1, 1, 0xFF, 2} to 2 bytes -> {1, 2} |
| 67 | + ICEBERG_UNWRAP_OR_FAIL(auto result2, |
| 68 | + TruncateUtils::TruncateLiteralMax(Literal::Binary(test2), 2)); |
| 69 | + EXPECT_EQ(result2, Literal::Binary(expected_output)); |
| 70 | + |
| 71 | + // Test2b: truncate {1, 1, 0xFF, 2} to 3 bytes -> {1, 2} |
| 72 | + ICEBERG_UNWRAP_OR_FAIL(auto result2b, |
| 73 | + TruncateUtils::TruncateLiteralMax(Literal::Binary(test2), 3)); |
| 74 | + EXPECT_EQ(result2b, Literal::Binary(expected_output)); |
| 75 | + |
| 76 | + // Test3: no truncation needed when length >= input size |
| 77 | + ICEBERG_UNWRAP_OR_FAIL(auto result3, |
| 78 | + TruncateUtils::TruncateLiteralMax(Literal::Binary(test3), 5)); |
| 79 | + EXPECT_EQ(result3, Literal::Binary(test3)); |
| 80 | + |
| 81 | + // Test3b: cannot truncate when first bytes are all 0xFF |
| 82 | + EXPECT_THAT(TruncateUtils::TruncateLiteralMax(Literal::Binary(test3), 2), |
| 83 | + IsError(ErrorKind::kInvalidArgument)); |
| 84 | + |
| 85 | + // Test4: truncate {1, 1, 0} to 2 bytes -> {1, 2} |
| 86 | + ICEBERG_UNWRAP_OR_FAIL(auto result4, |
| 87 | + TruncateUtils::TruncateLiteralMax(Literal::Binary(test4), 2)); |
| 88 | + EXPECT_EQ(result4, Literal::Binary(expected_output)); |
| 89 | +} |
| 90 | + |
| 91 | +TEST(TruncateUtilTest, TruncateStringMax) { |
| 92 | + // Test1: Japanese characters "イロハニホヘト" |
| 93 | + std::string test1 = |
| 94 | + "\xE3\x82\xA4\xE3\x83\xAD\xE3\x83\x8F\xE3\x83\x8B\xE3\x83\x9B\xE3\x83\x98\xE3\x83" |
| 95 | + "\x88"; |
| 96 | + std::string test1_2_expected = "\xE3\x82\xA4\xE3\x83\xAE"; // "イヮ" |
| 97 | + std::string test1_3_expected = "\xE3\x82\xA4\xE3\x83\xAD\xE3\x83\x90"; // "イロバ" |
| 98 | + |
| 99 | + ICEBERG_UNWRAP_OR_FAIL(auto result1_2, |
| 100 | + TruncateUtils::TruncateLiteralMax(Literal::String(test1), 2)); |
| 101 | + EXPECT_EQ(result1_2, Literal::String(test1_2_expected)); |
| 102 | + |
| 103 | + ICEBERG_UNWRAP_OR_FAIL(auto result1_3, |
| 104 | + TruncateUtils::TruncateLiteralMax(Literal::String(test1), 3)); |
| 105 | + EXPECT_EQ(result1_3, Literal::String(test1_3_expected)); |
| 106 | + |
| 107 | + // No truncation needed when length >= input size |
| 108 | + ICEBERG_UNWRAP_OR_FAIL(auto result1_7, |
| 109 | + TruncateUtils::TruncateLiteralMax(Literal::String(test1), 7)); |
| 110 | + EXPECT_EQ(result1_7, Literal::String(test1)); |
| 111 | + |
| 112 | + ICEBERG_UNWRAP_OR_FAIL(auto result1_8, |
| 113 | + TruncateUtils::TruncateLiteralMax(Literal::String(test1), 8)); |
| 114 | + EXPECT_EQ(result1_8, Literal::String(test1)); |
| 115 | + |
| 116 | + // Test2: Mixed characters "щщаεはчωいにπάほхεろへσκζ" |
| 117 | + std::string test2 = |
| 118 | + "\xD1\x89\xD1\x89\xD0\xB0\xCE\xB5\xE3\x81\xAF\xD1\x87\xCF\x89\xE3\x81\x84\xE3\x81" |
| 119 | + "\xAB\xCF\x80\xCE\xAC\xE3\x81\xBB\xD1\x85\xCE\xB5\xE3\x82\x8D\xE3\x81\xB8\xCF\x83" |
| 120 | + "\xCE\xBA\xCE\xB6"; |
| 121 | + std::string test2_7_expected = |
| 122 | + "\xD1\x89\xD1\x89\xD0\xB0\xCE\xB5\xE3\x81\xAF\xD1\x87\xCF\x8A"; // "щщаεはчϊ" |
| 123 | + |
| 124 | + ICEBERG_UNWRAP_OR_FAIL(auto result2_7, |
| 125 | + TruncateUtils::TruncateLiteralMax(Literal::String(test2), 7)); |
| 126 | + EXPECT_EQ(result2_7, Literal::String(test2_7_expected)); |
| 127 | + |
| 128 | + // Test3: String with max 3-byte UTF-8 character "aनि\uFFFF\uFFFF" |
| 129 | + std::string test3 = "a\xE0\xA4\xA8\xE0\xA4\xBF\xEF\xBF\xBF\xEF\xBF\xBF"; |
| 130 | + std::string test3_3_expected = "a\xE0\xA4\xA8\xE0\xA5\x80"; // "aनी" |
| 131 | + |
| 132 | + ICEBERG_UNWRAP_OR_FAIL(auto result3_3, |
| 133 | + TruncateUtils::TruncateLiteralMax(Literal::String(test3), 3)); |
| 134 | + EXPECT_EQ(result3_3, Literal::String(test3_3_expected)); |
| 135 | + |
| 136 | + // Test4: Max 3-byte UTF-8 character "\uFFFF\uFFFF" |
| 137 | + std::string test4 = "\xEF\xBF\xBF\xEF\xBF\xBF"; |
| 138 | + std::string test4_1_expected = "\xF0\x90\x80\x80"; // U+10000 (first 4-byte UTF-8 char) |
| 139 | + |
| 140 | + ICEBERG_UNWRAP_OR_FAIL(auto result4_1, |
| 141 | + TruncateUtils::TruncateLiteralMax(Literal::String(test4), 1)); |
| 142 | + EXPECT_EQ(result4_1, Literal::String(test4_1_expected)); |
| 143 | + |
| 144 | + // Test5: Max 4-byte UTF-8 characters "\uDBFF\uDFFF\uDBFF\uDFFF" |
| 145 | + std::string test5 = "\xF4\x8F\xBF\xBF\xF4\x8F\xBF\xBF"; // U+10FFFF U+10FFFF |
| 146 | + EXPECT_THAT(TruncateUtils::TruncateLiteralMax(Literal::String(test5), 1), |
| 147 | + IsError(ErrorKind::kInvalidArgument)); |
| 148 | + |
| 149 | + // Test6: 4-byte UTF-8 character "\uD800\uDFFF\uD800\uDFFF" |
| 150 | + std::string test6 = "\xF0\x90\x8F\xBF\xF0\x90\x8F\xBF"; // U+103FF U+103FF |
| 151 | + std::string test6_1_expected = "\xF0\x90\x90\x80"; // U+10400 |
| 152 | + |
| 153 | + ICEBERG_UNWRAP_OR_FAIL(auto result6_1, |
| 154 | + TruncateUtils::TruncateLiteralMax(Literal::String(test6), 1)); |
| 155 | + EXPECT_EQ(result6_1, Literal::String(test6_1_expected)); |
| 156 | + |
| 157 | + // Test7: Emoji "\uD83D\uDE02\uD83D\uDE02\uD83D\uDE02" |
| 158 | + std::string test7 = "\xF0\x9F\x98\x82\xF0\x9F\x98\x82\xF0\x9F\x98\x82"; // 😂😂😂 |
| 159 | + std::string test7_2_expected = "\xF0\x9F\x98\x82\xF0\x9F\x98\x83"; // 😂😃 |
| 160 | + std::string test7_1_expected = "\xF0\x9F\x98\x83"; // 😃 |
| 161 | + |
| 162 | + ICEBERG_UNWRAP_OR_FAIL(auto result7_2, |
| 163 | + TruncateUtils::TruncateLiteralMax(Literal::String(test7), 2)); |
| 164 | + EXPECT_EQ(result7_2, Literal::String(test7_2_expected)); |
| 165 | + |
| 166 | + ICEBERG_UNWRAP_OR_FAIL(auto result7_1, |
| 167 | + TruncateUtils::TruncateLiteralMax(Literal::String(test7), 1)); |
| 168 | + EXPECT_EQ(result7_1, Literal::String(test7_1_expected)); |
| 169 | + |
| 170 | + // Test8: Overflow case "a\uDBFF\uDFFFc" |
| 171 | + std::string test8 = |
| 172 | + "a\xF4\x8F\xBF\xBF" |
| 173 | + "c"; // a U+10FFFF c |
| 174 | + std::string test8_2_expected = "b"; |
| 175 | + |
| 176 | + ICEBERG_UNWRAP_OR_FAIL(auto result8_2, |
| 177 | + TruncateUtils::TruncateLiteralMax(Literal::String(test8), 2)); |
| 178 | + EXPECT_EQ(result8_2, Literal::String(test8_2_expected)); |
| 179 | + |
| 180 | + // Test9: Skip surrogate range "a" + (char)(Character.MIN_SURROGATE - 1) + "b" |
| 181 | + std::string test9 = |
| 182 | + "a\xED\x9F\xBF" |
| 183 | + "b"; // a U+D7FF b |
| 184 | + std::string test9_2_expected = "a\xEE\x80\x80"; // a U+E000 |
| 185 | + |
| 186 | + ICEBERG_UNWRAP_OR_FAIL(auto result9_2, |
| 187 | + TruncateUtils::TruncateLiteralMax(Literal::String(test9), 2)); |
| 188 | + EXPECT_EQ(result9_2, Literal::String(test9_2_expected)); |
| 189 | +} |
| 190 | + |
53 | 191 | } // namespace iceberg |
0 commit comments