|
17 | 17 | #include "paimon/common/utils/arrow/arrow_utils.h" |
18 | 18 |
|
19 | 19 | #include "arrow/api.h" |
| 20 | +#include "arrow/ipc/api.h" |
20 | 21 | #include "gtest/gtest.h" |
| 22 | +#include "paimon/common/types/data_field.h" |
21 | 23 | #include "paimon/testing/utils/testharness.h" |
22 | | - |
23 | 24 | namespace paimon::test { |
24 | 25 |
|
25 | 26 | TEST(ArrowUtilsTest, TestCreateProjection) { |
@@ -107,4 +108,232 @@ TEST(ArrowUtilsTest, TestCreateProjection) { |
107 | 108 | } |
108 | 109 | } |
109 | 110 |
|
| 111 | +TEST(ArrowUtilsTest, TestCheckNullableMatchSimple) { |
| 112 | + auto field = arrow::field("column1", arrow::int32(), /*nullable=*/false); |
| 113 | + auto schema = arrow::schema({field}); |
| 114 | + { |
| 115 | + std::shared_ptr<arrow::Array> array = |
| 116 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ |
| 117 | + [20], |
| 118 | + [null], |
| 119 | + [10] |
| 120 | +])") |
| 121 | + .ValueOrDie(); |
| 122 | + |
| 123 | + ASSERT_NOK_WITH_MSG( |
| 124 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 125 | + "CheckNullabilityMatch failed, field column1 not nullable while data have null value"); |
| 126 | + } |
| 127 | + { |
| 128 | + std::shared_ptr<arrow::Array> array = |
| 129 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ |
| 130 | + [20], |
| 131 | + [10] |
| 132 | +])") |
| 133 | + .ValueOrDie(); |
| 134 | + |
| 135 | + ASSERT_OK(ArrowUtils::CheckNullabilityMatch(schema, array)); |
| 136 | + } |
| 137 | +} |
| 138 | + |
| 139 | +TEST(ArrowUtilsTest, TestCheckNullableMatchWithStruct) { |
| 140 | + auto child1 = arrow::field("child1", arrow::int32(), /*nullable=*/false); |
| 141 | + auto child2 = arrow::field("child2", arrow::float64(), /*nullable=*/true); |
| 142 | + auto struct_field = |
| 143 | + arrow::field("parent", arrow::struct_({child1, child2}), /*nullable=*/false); |
| 144 | + auto schema = arrow::schema({struct_field}); |
| 145 | + { |
| 146 | + std::shared_ptr<arrow::Array> array = |
| 147 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({struct_field}), R"([ |
| 148 | + [null] |
| 149 | +])") |
| 150 | + .ValueOrDie(); |
| 151 | + ASSERT_NOK_WITH_MSG( |
| 152 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 153 | + "CheckNullabilityMatch failed, field parent not nullable while data have null value"); |
| 154 | + } |
| 155 | + { |
| 156 | + std::shared_ptr<arrow::Array> array = |
| 157 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({struct_field}), R"([ |
| 158 | + [[1, null]], |
| 159 | + [[null, 10.0]] |
| 160 | +])") |
| 161 | + .ValueOrDie(); |
| 162 | + ASSERT_NOK_WITH_MSG( |
| 163 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 164 | + "CheckNullabilityMatch failed, field child1 not nullable while data have null value"); |
| 165 | + } |
| 166 | + { |
| 167 | + std::shared_ptr<arrow::Array> array = |
| 168 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({struct_field}), R"([ |
| 169 | + [[1, null]], |
| 170 | + [[2, 10.0]] |
| 171 | +])") |
| 172 | + .ValueOrDie(); |
| 173 | + ASSERT_OK(ArrowUtils::CheckNullabilityMatch(schema, array)); |
| 174 | + } |
| 175 | +} |
| 176 | + |
| 177 | +TEST(ArrowUtilsTest, TestCheckNullableMatchWithList) { |
| 178 | + auto value_field = arrow::field("value", arrow::int32(), /*nullable=*/false); |
| 179 | + auto list_field = arrow::field("list_column", arrow::list(value_field), /*nullable=*/false); |
| 180 | + auto schema = arrow::schema({list_field}); |
| 181 | + |
| 182 | + { |
| 183 | + std::shared_ptr<arrow::Array> array = |
| 184 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({list_field}), R"([ |
| 185 | + [[1, 2, null, 4, 5]], |
| 186 | + [null] |
| 187 | +])") |
| 188 | + .ValueOrDie(); |
| 189 | + ASSERT_NOK_WITH_MSG(ArrowUtils::CheckNullabilityMatch(schema, array), |
| 190 | + "CheckNullabilityMatch failed, field list_column not nullable while " |
| 191 | + "data have null value"); |
| 192 | + } |
| 193 | + { |
| 194 | + std::shared_ptr<arrow::Array> array = |
| 195 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({list_field}), R"([ |
| 196 | + [[1, 2, null, 4, 5]] |
| 197 | +])") |
| 198 | + .ValueOrDie(); |
| 199 | + ASSERT_NOK_WITH_MSG( |
| 200 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 201 | + "CheckNullabilityMatch failed, field value not nullable while data have null value"); |
| 202 | + } |
| 203 | + { |
| 204 | + std::shared_ptr<arrow::Array> array = |
| 205 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({list_field}), R"([ |
| 206 | + [[1, 2, 3, 4, 5]] |
| 207 | +])") |
| 208 | + .ValueOrDie(); |
| 209 | + ASSERT_OK(ArrowUtils::CheckNullabilityMatch(schema, array)); |
| 210 | + } |
| 211 | +} |
| 212 | + |
| 213 | +TEST(ArrowUtilsTest, TestCheckNullableMatchWithMap) { |
| 214 | + auto key_field = arrow::field("key", arrow::int32(), /*nullable=*/false); |
| 215 | + auto value_field = arrow::field("value", arrow::int32(), /*nullable=*/true); |
| 216 | + auto map_type = std::make_shared<arrow::MapType>(key_field, value_field); |
| 217 | + auto map_field = arrow::field("map_column", map_type, /*nullable=*/false); |
| 218 | + auto schema = arrow::schema({map_field}); |
| 219 | + |
| 220 | + { |
| 221 | + std::shared_ptr<arrow::Array> array = |
| 222 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({map_field}), R"([ |
| 223 | + [null] |
| 224 | +])") |
| 225 | + .ValueOrDie(); |
| 226 | + ASSERT_NOK_WITH_MSG(ArrowUtils::CheckNullabilityMatch(schema, array), |
| 227 | + "CheckNullabilityMatch failed, field map_column not nullable while " |
| 228 | + "data have null value"); |
| 229 | + } |
| 230 | + { |
| 231 | + std::shared_ptr<arrow::Array> array = |
| 232 | + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({map_field}), R"([ |
| 233 | + [[[1, null]]] |
| 234 | +])") |
| 235 | + .ValueOrDie(); |
| 236 | + ASSERT_OK(ArrowUtils::CheckNullabilityMatch(schema, array)); |
| 237 | + } |
| 238 | +} |
| 239 | + |
| 240 | +TEST(ArrowUtilsTest, TestCheckNullableMatchComplex) { |
| 241 | + auto key_field = arrow::field("key", arrow::int32(), /*nullable=*/false); |
| 242 | + auto value_field = arrow::field("value", arrow::int32(), /*nullable=*/false); |
| 243 | + |
| 244 | + auto inner_child1 = |
| 245 | + arrow::field("inner1", |
| 246 | + arrow::map(arrow::utf8(), arrow::field("inner_list", arrow::list(value_field), |
| 247 | + /*nullable=*/true)), |
| 248 | + /*nullable=*/false); |
| 249 | + auto inner_child2 = arrow::field( |
| 250 | + "inner2", |
| 251 | + arrow::map(arrow::utf8(), arrow::field("inner_map", arrow::map(arrow::utf8(), value_field), |
| 252 | + /*nullable=*/true)), |
| 253 | + /*nullable=*/false); |
| 254 | + auto inner_child3 = arrow::field( |
| 255 | + "inner3", |
| 256 | + arrow::map(arrow::utf8(), |
| 257 | + arrow::field("inner_struct", arrow::struct_({key_field, value_field}), |
| 258 | + /*nullable=*/true)), |
| 259 | + /*nullable=*/false); |
| 260 | + |
| 261 | + auto schema = arrow::schema({inner_child1, inner_child2, inner_child3}); |
| 262 | + // test inner1 |
| 263 | + { |
| 264 | + std::shared_ptr<arrow::Array> array = |
| 265 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 266 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 267 | +[[["outer_key", [1, 2, 3, null]]], [["outer_key", [["key1", 1]]]], [["outer_key", [100, 200]]]] |
| 268 | +])") |
| 269 | + .ValueOrDie(); |
| 270 | + ASSERT_NOK_WITH_MSG( |
| 271 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 272 | + "CheckNullabilityMatch failed, field value not nullable while data have null value"); |
| 273 | + } |
| 274 | + { |
| 275 | + std::shared_ptr<arrow::Array> array = |
| 276 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 277 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 278 | +[[["outer_key", [1, 2, 3]]], [["outer_key", [["key1", 1]]]], [["outer_key", [100, 200]]]], |
| 279 | +[[["outer_key", null]], [["outer_key", [["key1", 1]]]], [["outer_key", [100, 200]]]], |
| 280 | +[null, [["outer_key", [["key1", 1]]]], [["outer_key", [100, 200]]]] |
| 281 | +])") |
| 282 | + .ValueOrDie(); |
| 283 | + ASSERT_NOK_WITH_MSG( |
| 284 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 285 | + "CheckNullabilityMatch failed, field inner1 not nullable while data have null value"); |
| 286 | + } |
| 287 | + // test inner2 |
| 288 | + { |
| 289 | + std::shared_ptr<arrow::Array> array = |
| 290 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 291 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 292 | +[[["outer_key", [1, 2, 3]]], [["outer_key", null]], [["outer_key", [100, 200]]]], |
| 293 | +[[["outer_key", null]], [["outer_key", [["key1", null]]]], [["outer_key", [100, 200]]]] |
| 294 | +])") |
| 295 | + .ValueOrDie(); |
| 296 | + ASSERT_NOK_WITH_MSG( |
| 297 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 298 | + "CheckNullabilityMatch failed, field value not nullable while data have null value"); |
| 299 | + } |
| 300 | + { |
| 301 | + std::shared_ptr<arrow::Array> array = |
| 302 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 303 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 304 | +[[["outer_key", [1, 2, 3]]], [["outer_key", null]], [["outer_key", [100, 200]]]], |
| 305 | +[[["outer_key", [1, 2, 3]]], null, [["outer_key", [100, 200]]]] |
| 306 | +])") |
| 307 | + .ValueOrDie(); |
| 308 | + ASSERT_NOK_WITH_MSG( |
| 309 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 310 | + "CheckNullabilityMatch failed, field inner2 not nullable while data have null value"); |
| 311 | + } |
| 312 | + // test inner3 |
| 313 | + { |
| 314 | + std::shared_ptr<arrow::Array> array = |
| 315 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 316 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 317 | +[[["outer_key", [1, 2, 3]]], [["outer_key", null]], [["outer_key", null]]], |
| 318 | +[[["outer_key", null]], [["outer_key", [["key1", 2]]]], [["outer_key", [100, null]]]] |
| 319 | +])") |
| 320 | + .ValueOrDie(); |
| 321 | + ASSERT_NOK_WITH_MSG( |
| 322 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 323 | + "CheckNullabilityMatch failed, field value not nullable while data have null value"); |
| 324 | + } |
| 325 | + { |
| 326 | + std::shared_ptr<arrow::Array> array = |
| 327 | + arrow::ipc::internal::json::ArrayFromJSON( |
| 328 | + arrow::struct_({inner_child1, inner_child2, inner_child3}), R"([ |
| 329 | +[[["outer_key", [1, 2, 3]]], [["outer_key", null]], [["outer_key", null]]], |
| 330 | +[[["outer_key", null]], [["outer_key", [["key1", 2]]]], null] |
| 331 | +])") |
| 332 | + .ValueOrDie(); |
| 333 | + ASSERT_NOK_WITH_MSG( |
| 334 | + ArrowUtils::CheckNullabilityMatch(schema, array), |
| 335 | + "CheckNullabilityMatch failed, field inner3 not nullable while data have null value"); |
| 336 | + } |
| 337 | +} |
| 338 | + |
110 | 339 | } // namespace paimon::test |
0 commit comments