Skip to content

Commit 9e3d1f3

Browse files
authored
[refine](column) replace field-based nested default with column-level operation in ColumnVariant (#62547)
`ColumnVariant::try_insert_default_from_nested` was filling missing nested subcolumn rows by converting the last row of a sibling leaf column to a `Field`, recursively walking the `Field` tree via `FieldVisitorReplaceScalars` to replace scalar values with the type default, and then inserting the resulting `Field` back into the column. This approach was unnecessarily expensive (column → Field → Field tree traversal → column) and inconsistent with the batch counterpart `try_insert_many_defaults_from_nested`, which operates entirely at the column level using `cut` + `clone_with_default_values`. The scalar visitor (`FieldVisitorReplaceScalars`) existed as duplicate dead code in both `column_variant.cpp` and `variant_util.cpp`. ### Changes - **`try_insert_default_from_nested`**: rewrite to use the same column-level approach as `try_insert_many_defaults_from_nested` — `leaf->data.cut(leaf_size - 1, 1).clone_with_default_values(field_info)` — eliminating the Field round-trip and the dependency on `get_default()` / `FieldVisitorReplaceScalars`. - **Remove `FieldVisitorReplaceScalars`** from both `column_variant.cpp` and `variant_util.cpp`; neither file has any remaining callers. - **Remove `field_visitor` unit test** in `column_variant_test.cpp` that directly tested the now-deleted visitor class.
1 parent 2bc4487 commit 9e3d1f3

File tree

3 files changed

+15
-99
lines changed

3 files changed

+15
-99
lines changed

be/src/core/column/column_variant.cpp

Lines changed: 15 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,64 +2488,33 @@ bool ColumnVariant::try_insert_many_defaults_from_nested(const Subcolumns::NodeP
24882488
return true;
24892489
}
24902490

2491-
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
2492-
/// and replaces all scalars or nested arrays to @replacement at that level.
2493-
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
2494-
public:
2495-
FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
2496-
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}
2497-
2498-
template <PrimitiveType T>
2499-
Field apply(const typename PrimitiveTypeTraits<T>::CppType& x) const {
2500-
if constexpr (T == TYPE_ARRAY) {
2501-
if (num_dimensions_to_keep == 0) {
2502-
return replacement;
2503-
}
2504-
2505-
const size_t size = x.size();
2506-
Array res(size);
2507-
for (size_t i = 0; i < size; ++i) {
2508-
res[i] = apply_visitor(
2509-
FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
2510-
}
2511-
return Field::create_field<TYPE_ARRAY>(res);
2512-
} else {
2513-
return replacement;
2514-
}
2515-
}
2516-
2517-
private:
2518-
const Field& replacement;
2519-
size_t num_dimensions_to_keep;
2520-
};
2521-
25222491
bool ColumnVariant::try_insert_default_from_nested(const Subcolumns::NodePtr& entry) const {
25232492
const auto* leaf = get_leaf_of_the_same_nested(entry);
25242493
if (!leaf) {
25252494
return false;
25262495
}
25272496

2528-
auto last_field = leaf->data.get_last_field();
2529-
if (last_field.is_null()) {
2497+
size_t leaf_size = leaf->data.size();
2498+
if (leaf_size == 0) {
25302499
return false;
25312500
}
25322501

2533-
size_t leaf_num_dimensions = leaf->data.get_dimensions();
2534-
size_t entry_num_dimensions = entry->data.get_dimensions();
2535-
2536-
if (entry_num_dimensions > leaf_num_dimensions) {
2537-
throw doris::Exception(
2538-
ErrorCode::INVALID_ARGUMENT,
2539-
"entry_num_dimensions > leaf_num_dimensions, entry_num_dimensions={}, "
2540-
"leaf_num_dimensions={}",
2541-
entry_num_dimensions, leaf_num_dimensions);
2502+
// Preserve the original null guard: if the donor leaf's last row is NULL,
2503+
// fall back to insert_default() so that a missing sibling stays NULL too.
2504+
if (leaf->data.is_null_at(leaf_size - 1)) {
2505+
return false;
25422506
}
25432507

2544-
auto default_scalar = entry->data.get_least_common_type()->get_default();
2508+
FieldInfo field_info = {
2509+
.scalar_type_id = entry->data.least_common_type.get_base_type_id(),
2510+
.num_dimensions = entry->data.get_dimensions(),
2511+
};
25452512

2546-
auto default_field = apply_visitor(
2547-
FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field);
2548-
entry->data.insert(std::move(default_field));
2513+
// Reuse the same column-level approach as try_insert_many_defaults_from_nested:
2514+
// cut the last row from leaf, replace scalar values with defaults, keep array structure.
2515+
auto new_subcolumn = leaf->data.cut(leaf_size - 1, 1).clone_with_default_values(field_info);
2516+
entry->data.insert_range_from(new_subcolumn, 0, 1);
2517+
ENABLE_CHECK_CONSISTENCY(this);
25492518
return true;
25502519
}
25512520

be/src/exec/common/variant_util.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,35 +1821,6 @@ static void append_field_to_binary_chars(const Field& field, ColumnString::Chars
18211821
field.get_type());
18221822
}
18231823
}
1824-
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
1825-
/// and replaces all scalars or nested arrays to @replacement at that level.
1826-
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
1827-
public:
1828-
FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
1829-
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}
1830-
template <PrimitiveType T>
1831-
Field operator()(const typename PrimitiveTypeTraits<T>::CppType& x) const {
1832-
if constexpr (T == TYPE_ARRAY) {
1833-
if (num_dimensions_to_keep == 0) {
1834-
return replacement;
1835-
}
1836-
const size_t size = x.size();
1837-
Array res(size);
1838-
for (size_t i = 0; i < size; ++i) {
1839-
res[i] = apply_visitor(
1840-
FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
1841-
}
1842-
return Field::create_field<TYPE_ARRAY>(res);
1843-
} else {
1844-
return replacement;
1845-
}
1846-
}
1847-
1848-
private:
1849-
const Field& replacement;
1850-
size_t num_dimensions_to_keep;
1851-
};
1852-
18531824
template <typename ParserImpl>
18541825
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
18551826
JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {

be/test/core/column/column_variant_test.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3082,30 +3082,6 @@ TEST_F(ColumnVariantTest, get_field_info_all_types) {
30823082
}
30833083
}
30843084

3085-
TEST_F(ColumnVariantTest, field_visitor) {
3086-
// Test replacing scalar values in a flat array
3087-
{
3088-
Array array;
3089-
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(1)));
3090-
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(2)));
3091-
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(3)));
3092-
3093-
Field field = Field::create_field<TYPE_ARRAY>(std::move(array));
3094-
Field replacement = Field::create_field<TYPE_BIGINT>(Int64(42));
3095-
Field result = apply_visitor(FieldVisitorReplaceScalars(replacement, 0), field);
3096-
3097-
EXPECT_EQ(result.get<TYPE_BIGINT>(), 42);
3098-
3099-
Field replacement1 = Field::create_field<TYPE_BIGINT>(Int64(42));
3100-
Field result1 = apply_visitor(FieldVisitorReplaceScalars(replacement, 1), field);
3101-
3102-
EXPECT_EQ(result1.get<TYPE_ARRAY>().size(), 3);
3103-
EXPECT_EQ(result1.get<TYPE_ARRAY>()[0].get<TYPE_BIGINT>(), 42);
3104-
EXPECT_EQ(result1.get<TYPE_ARRAY>()[1].get<TYPE_BIGINT>(), 42);
3105-
EXPECT_EQ(result1.get<TYPE_ARRAY>()[2].get<TYPE_BIGINT>(), 42);
3106-
}
3107-
}
3108-
31093085
TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
31103086
// Test insert_range_from
31113087
{

0 commit comments

Comments
 (0)