Skip to content

Commit d2b4eba

Browse files
yingsu00Lakehouse Engine Bot
authored andcommitted
fix: Avoid allocating null buffer when partitioning null-free vectors
PartitionedFlatVector::partition() and PartitionedRowVector::partition() called mutableRawNulls() unconditionally. mutableRawNulls() allocates a null buffer if one does not exist, causing mayHaveNulls() to return true for every vector after partitioning, even when the original had no nulls. Fix both sites to check rawNulls() first and only call mutableRawNulls() when a null buffer already exists. Add noNullBufferAllocatedForNullFreeFlat and noNullBufferAllocatedForNullFreeRow tests to PartitionedVectorTest to cover this case. # Conflicts: # velox/vector/PartitionedVector.cpp Alchemy-item: (ID = 1179) Optimized PartitionedOutput staging hub commit 3/3 - 2706c1e
1 parent 70ae50d commit d2b4eba

2 files changed

Lines changed: 56 additions & 7 deletions

File tree

velox/vector/PartitionedVector.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ template <typename T>
346346
void PartitionedFlatVector<T>::partition(
347347
const std::vector<uint32_t>& partitions,
348348
PartitionBuildContext& ctx) {
349-
Byte* rawNulls = reinterpret_cast<Byte*>(vector_->mutableRawNulls());
350-
if (rawNulls) {
349+
if (vector_->rawNulls()) {
350+
Byte* rawNulls = reinterpret_cast<Byte*>(vector_->mutableRawNulls());
351351
partitionBitsInPlace(
352352
rawNulls, partitions, numPartitions_, ctx, endPartitionOffsets_, pool_);
353353
}
@@ -391,17 +391,15 @@ void PartitionedRowVector::partition(
391391
pool_));
392392
}
393393

394-
if (numPartitions_ > 1) {
394+
if (numPartitions_ > 1 && vector_->rawNulls()) {
395395
Byte* rawNulls = reinterpret_cast<Byte*>(vector_->mutableRawNulls());
396-
if (rawNulls) {
397-
partitionBitsInPlace(
398-
rawNulls,
396+
partitionBitsInPlace(
397+
rawNulls,
399398
partitions,
400399
numPartitions_,
401400
ctx,
402401
endPartitionOffsets_,
403402
pool_);
404-
}
405403
}
406404
}
407405

velox/vector/tests/PartitionedVectorTest.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,57 @@ TEST_P(PartitioningVectorTest, testRowVector) {
217217
}));
218218
}
219219

220+
// Partitioning a null-free vector must not allocate a null buffer.
221+
TEST_P(PartitioningVectorTest, noNullBufferAllocatedForNullFreeFlat) {
222+
const int numValues = GetParam();
223+
if (numValues == 0) {
224+
return;
225+
}
226+
227+
auto flat = makeFlatVector<int32_t>(numValues, [](auto row) { return row; });
228+
ASSERT_FALSE(flat->mayHaveNulls());
229+
230+
std::vector<uint32_t> partitions(numValues);
231+
for (int i = 0; i < numValues; ++i) {
232+
partitions[i] = i % 2;
233+
}
234+
235+
auto pv = PartitionedVector::create(flat, partitions, 2, ctx_, pool_.get());
236+
EXPECT_FALSE(pv->baseVector()->mayHaveNulls())
237+
<< "partition() must not allocate a null buffer for a null-free FlatVector";
238+
}
239+
240+
// Partitioning a null-free RowVector must not allocate null buffers on the
241+
// row vector or any of its children.
242+
TEST_P(PartitioningVectorTest, noNullBufferAllocatedForNullFreeRow) {
243+
const int numValues = GetParam();
244+
if (numValues == 0) {
245+
return;
246+
}
247+
248+
auto row = makeRowVector({
249+
makeFlatVector<int32_t>(numValues, [](auto row) { return row; }),
250+
makeFlatVector<int64_t>(numValues, [](auto row) { return row * 10; }),
251+
});
252+
ASSERT_FALSE(row->mayHaveNulls());
253+
ASSERT_FALSE(row->childAt(0)->mayHaveNulls());
254+
ASSERT_FALSE(row->childAt(1)->mayHaveNulls());
255+
256+
std::vector<uint32_t> partitions(numValues);
257+
for (int i = 0; i < numValues; ++i) {
258+
partitions[i] = i % 2;
259+
}
260+
261+
auto pv = PartitionedVector::create(row, partitions, 2, ctx_, pool_.get());
262+
auto* base = pv->baseVector()->as<RowVector>();
263+
EXPECT_FALSE(base->mayHaveNulls())
264+
<< "partition() must not allocate a null buffer for a null-free RowVector";
265+
EXPECT_FALSE(base->childAt(0)->mayHaveNulls())
266+
<< "partition() must not allocate a null buffer for null-free child 0";
267+
EXPECT_FALSE(base->childAt(1)->mayHaveNulls())
268+
<< "partition() must not allocate a null buffer for null-free child 1";
269+
}
270+
220271
// Test with different vector sizes, including edge cases like 0 and 1.
221272
INSTANTIATE_TEST_SUITE_P(
222273
FlatVectorSizes,

0 commit comments

Comments
 (0)