Skip to content

Commit c4b2569

Browse files
authored
Replace BooleanBufferBuilder with NullBufferBuilder in arrow-json if applicable (#9811)
# Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. --> - Closes #9781. # Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> # What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> Replace `BooleanBufferBuilder` with `NullBufferBuilder` so we get the benefit of late materialization # Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Covered by existing tests # Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. If there are any breaking changes to public APIs, please call them out. --> No
1 parent 04f207c commit c4b2569

3 files changed

Lines changed: 27 additions & 39 deletions

File tree

arrow-json/src/reader/list_array.rs

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818
use std::marker::PhantomData;
1919
use std::sync::Arc;
2020

21-
use arrow_array::builder::BooleanBufferBuilder;
2221
use arrow_array::{
2322
ArrayRef, FixedSizeListArray, GenericListArray, GenericListViewArray, OffsetSizeTrait,
2423
};
25-
use arrow_buffer::buffer::NullBuffer;
26-
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
24+
use arrow_buffer::{NullBufferBuilder, OffsetBuffer, ScalarBuffer};
2725
use arrow_schema::{ArrowError, DataType, FieldRef};
2826

2927
use crate::reader::tape::{Tape, TapeElement};
@@ -71,23 +69,21 @@ impl<O: OffsetSizeTrait, const IS_VIEW: bool> ArrayDecoder for ListLikeArrayDeco
7169
let mut offsets = Vec::with_capacity(pos.len() + 1);
7270
offsets.push(O::from_usize(0).unwrap());
7371

74-
let mut nulls = self
75-
.is_nullable
76-
.then(|| BooleanBufferBuilder::new(pos.len()));
72+
let mut nulls = self.is_nullable.then(|| NullBufferBuilder::new(pos.len()));
7773

7874
for p in pos {
7975
let end_idx = match (tape.get(*p), nulls.as_mut()) {
8076
(TapeElement::StartList(end_idx), None) => end_idx,
8177
(TapeElement::StartList(end_idx), Some(nulls)) => {
82-
nulls.append(true);
78+
nulls.append_non_null();
8379
end_idx
8480
}
8581
(TapeElement::Null, Some(nulls)) => {
86-
nulls.append(false);
82+
nulls.append_null();
8783
*p + 1
8884
}
8985
(_, Some(nulls)) if self.ignore_type_conflicts => {
90-
nulls.append(false);
86+
nulls.append_null();
9187
*p + 1
9288
}
9389
_ => return Err(tape.error(*p, "[")),
@@ -108,7 +104,7 @@ impl<O: OffsetSizeTrait, const IS_VIEW: bool> ArrayDecoder for ListLikeArrayDeco
108104
}
109105

110106
let values = self.decoder.decode(tape, &child_pos)?;
111-
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
107+
let nulls = nulls.as_mut().and_then(|x| x.finish());
112108

113109
if IS_VIEW {
114110
let mut sizes = Vec::with_capacity(offsets.len() - 1);
@@ -172,24 +168,22 @@ impl ArrayDecoder for FixedSizeListArrayDecoder {
172168
let expected = self.size as usize;
173169
let mut child_pos = Vec::with_capacity(pos.len() * expected);
174170

175-
let mut nulls = self
176-
.is_nullable
177-
.then(|| BooleanBufferBuilder::new(pos.len()));
171+
let mut nulls = self.is_nullable.then(|| NullBufferBuilder::new(pos.len()));
178172

179173
for p in pos {
180174
let end_idx = match (tape.get(*p), nulls.as_mut()) {
181175
(TapeElement::StartList(end_idx), None) => end_idx,
182176
(TapeElement::StartList(end_idx), Some(nulls)) => {
183-
nulls.append(true);
177+
nulls.append_non_null();
184178
end_idx
185179
}
186180
(TapeElement::Null, Some(nulls)) => {
187-
nulls.append(false);
181+
nulls.append_null();
188182
child_pos.resize(child_pos.len() + expected, 0);
189183
continue;
190184
}
191185
(_, Some(nulls)) if self.ignore_type_conflicts => {
192-
nulls.append(false);
186+
nulls.append_null();
193187
child_pos.resize(child_pos.len() + expected, 0);
194188
continue;
195189
}
@@ -213,7 +207,7 @@ impl ArrayDecoder for FixedSizeListArrayDecoder {
213207
}
214208

215209
let values = self.decoder.decode(tape, &child_pos)?;
216-
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
210+
let nulls = nulls.as_mut().and_then(|x| x.finish());
217211

218212
let array = FixedSizeListArray::try_new(self.field.clone(), self.size, values, nulls)?;
219213
Ok(Arc::new(array))

arrow-json/src/reader/map_array.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
20+
use arrow_array::builder::BufferBuilder;
2121
use arrow_array::{ArrayRef, MapArray, StructArray};
22-
use arrow_buffer::buffer::NullBuffer;
23-
use arrow_buffer::{ArrowNativeType, OffsetBuffer, ScalarBuffer};
22+
use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer, ScalarBuffer};
2423
use arrow_schema::{ArrowError, DataType, FieldRef, Fields};
2524

2625
use crate::reader::tape::{Tape, TapeElement};
@@ -90,23 +89,21 @@ impl ArrayDecoder for MapArrayDecoder {
9089
let mut key_pos = Vec::with_capacity(pos.len());
9190
let mut value_pos = Vec::with_capacity(pos.len());
9291

93-
let mut nulls = self
94-
.is_nullable
95-
.then(|| BooleanBufferBuilder::new(pos.len()));
92+
let mut nulls = self.is_nullable.then(|| NullBufferBuilder::new(pos.len()));
9693

9794
for p in pos.iter().copied() {
9895
let end_idx = match (tape.get(p), nulls.as_mut()) {
9996
(TapeElement::StartObject(end_idx), None) => end_idx,
10097
(TapeElement::StartObject(end_idx), Some(nulls)) => {
101-
nulls.append(true);
98+
nulls.append_non_null();
10299
end_idx
103100
}
104101
(TapeElement::Null, Some(nulls)) => {
105-
nulls.append(false);
102+
nulls.append_null();
106103
p + 1
107104
}
108105
(_, Some(nulls)) if self.ignore_type_conflicts => {
109-
nulls.append(false);
106+
nulls.append_null();
110107
p + 1
111108
}
112109
_ => return Err(tape.error(p, "{")),
@@ -143,7 +140,7 @@ impl ArrayDecoder for MapArrayDecoder {
143140
)
144141
};
145142

146-
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
143+
let nulls = nulls.as_mut().and_then(|x| x.finish());
147144
// SAFETY: offsets are built monotonically starting from 0
148145
let offsets = unsafe { OffsetBuffer::new_unchecked(ScalarBuffer::from(offsets.finish())) };
149146

arrow-json/src/reader/struct_array.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
use std::collections::HashMap;
1919
use std::sync::Arc;
2020

21-
use arrow_array::builder::BooleanBufferBuilder;
2221
use arrow_array::{Array, ArrayRef, StructArray};
23-
use arrow_buffer::buffer::NullBuffer;
22+
use arrow_buffer::NullBufferBuilder;
2423
use arrow_schema::{ArrowError, DataType, Fields};
2524

2625
use crate::reader::tape::{Tape, TapeElement};
@@ -126,9 +125,7 @@ impl ArrayDecoder for StructArrayDecoder {
126125
let row_count = pos.len();
127126
let field_count = fields.len();
128127
self.field_tape_positions.resize(field_count, row_count)?;
129-
let mut nulls = self
130-
.is_nullable
131-
.then(|| BooleanBufferBuilder::new(pos.len()));
128+
let mut nulls = self.is_nullable.then(|| NullBufferBuilder::new(pos.len()));
132129

133130
{
134131
// We avoid having the match on self.struct_mode inside the hot loop for performance
@@ -139,15 +136,15 @@ impl ArrayDecoder for StructArrayDecoder {
139136
let end_idx = match (tape.get(*p), nulls.as_mut()) {
140137
(TapeElement::StartObject(end_idx), None) => end_idx,
141138
(TapeElement::StartObject(end_idx), Some(nulls)) => {
142-
nulls.append(true);
139+
nulls.append_non_null();
143140
end_idx
144141
}
145142
(TapeElement::Null, Some(nulls)) => {
146-
nulls.append(false);
143+
nulls.append_null();
147144
continue;
148145
}
149146
(_, Some(nulls)) if self.ignore_type_conflicts => {
150-
nulls.append(false);
147+
nulls.append_null();
151148
continue;
152149
}
153150
(_, _) => return Err(tape.error(*p, "{")),
@@ -188,15 +185,15 @@ impl ArrayDecoder for StructArrayDecoder {
188185
let end_idx = match (tape.get(*p), nulls.as_mut()) {
189186
(TapeElement::StartList(end_idx), None) => end_idx,
190187
(TapeElement::StartList(end_idx), Some(nulls)) => {
191-
nulls.append(true);
188+
nulls.append_non_null();
192189
end_idx
193190
}
194191
(TapeElement::Null, Some(nulls)) => {
195-
nulls.append(false);
192+
nulls.append_null();
196193
continue;
197194
}
198195
(_, Some(nulls)) if self.ignore_type_conflicts => {
199-
nulls.append(false);
196+
nulls.append_null();
200197
continue;
201198
}
202199
(_, _) => return Err(tape.error(*p, "[")),
@@ -245,7 +242,7 @@ impl ArrayDecoder for StructArrayDecoder {
245242
})
246243
.collect::<Result<Vec<_>, ArrowError>>()?;
247244

248-
let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
245+
let nulls = nulls.as_mut().and_then(|x| x.finish());
249246

250247
for (c, f) in child_arrays.iter().zip(fields) {
251248
// Sanity check

0 commit comments

Comments
 (0)