Skip to content

Commit 30185d6

Browse files
authored
perf: Remove bool_val from Parquet Thrift FieldIdentifier (#9945)
# Which issue does this PR close? - Closes #9946. # Rationale for this change Remove some unnecessary branching from a hot path in the Thrift parser, improving performance and making for easier to read code. # What changes are included in this PR? Removes the `bool_val` field from `FieldIdentifier`. # Are these changes tested? Covered by existing tests # Are there any user-facing changes? No, this is an internal-only API
1 parent 2e8e0c7 commit 30185d6

3 files changed

Lines changed: 36 additions & 52 deletions

File tree

parquet/src/file/metadata/thrift/mod.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,13 +1135,7 @@ impl DataPageHeaderV2 {
11351135
repetition_levels_byte_length = Some(val);
11361136
}
11371137
7 => {
1138-
if field_ident.bool_val.is_none() {
1139-
return Err(general_err!(
1140-
"Expected bool field but got thrift type {:?}",
1141-
field_ident.field_type
1142-
));
1143-
}
1144-
is_compressed = field_ident.bool_val;
1138+
is_compressed = Some(field_ident.bool_val()?);
11451139
}
11461140
_ => {
11471141
prot.skip(field_ident.field_type)?;
@@ -1837,7 +1831,7 @@ pub(crate) mod tests {
18371831
fn assert_malformed_bool_error(err: crate::errors::ParquetError) {
18381832
let msg = err.to_string();
18391833
assert!(
1840-
msg.contains("Expected bool field"),
1834+
msg.contains("Unexpected struct field type"),
18411835
"unexpected error message: {msg}"
18421836
);
18431837
}

parquet/src/parquet_macros.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,7 @@ macro_rules! __thrift_read_field {
464464
$crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)?
465465
};
466466
($prot:tt, $field_ident:tt, bool) => {
467-
$field_ident.bool_val.ok_or_else(|| general_err!(
468-
"Expected bool field but got thrift type {:?}",
469-
$field_ident.field_type
470-
))?
467+
$field_ident.bool_val()?
471468
};
472469
($prot:tt, $field_ident:tt, $field_type:ident) => {
473470
$field_type::read_thrift(&mut *$prot)?

parquet/src/parquet_thrift.rs

Lines changed: 33 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ impl From<ThriftProtocolError> for ParquetError {
5959
match e {
6060
ThriftProtocolError::Eof => eof_err!("Unexpected EOF"),
6161
ThriftProtocolError::IO(e) => e.into(),
62-
ThriftProtocolError::InvalidFieldType(value) => {
63-
general_err!("Unexpected struct field type {}", value)
64-
}
62+
ThriftProtocolError::InvalidFieldType(value) => match FieldType::try_from(value) {
63+
Ok(fld_type) => general_err!("Unexpected struct field type {:?}", fld_type),
64+
Err(_) => general_err!("Unexpected struct field type {}", value),
65+
},
6566
ThriftProtocolError::InvalidElementType(value) => {
6667
general_err!("Unexpected list/set element type {}", value)
6768
}
@@ -247,11 +248,16 @@ pub(crate) struct FieldIdentifier {
247248
pub(crate) field_type: FieldType,
248249
/// The field's `id`. May be computed from delta or directly decoded.
249250
pub(crate) id: i16,
250-
/// Stores the value for booleans.
251-
///
252-
/// Boolean fields store no data, instead the field type is either boolean true, or
253-
/// boolean false.
254-
pub(crate) bool_val: Option<bool>,
251+
}
252+
253+
impl FieldIdentifier {
254+
pub(crate) fn bool_val(&self) -> ThriftProtocolResult<bool> {
255+
match self.field_type {
256+
FieldType::BooleanTrue => Ok(true),
257+
FieldType::BooleanFalse => Ok(false),
258+
_ => Err(ThriftProtocolError::InvalidFieldType(self.field_type as u8)),
259+
}
260+
}
255261
}
256262

257263
/// Struct used to describe a [thrift list].
@@ -358,41 +364,28 @@ pub(crate) trait ThriftCompactInputProtocol<'a> {
358364
// - the type
359365
// - the field delta and the type
360366
let field_type = self.read_byte()?;
361-
let field_delta = (field_type & 0xf0) >> 4;
362-
let field_type = FieldType::try_from(field_type & 0xf)?;
363-
let mut bool_val: Option<bool> = None;
364-
365-
match field_type {
366-
FieldType::Stop => Ok(FieldIdentifier {
367+
if field_type & 0xf == 0 {
368+
return Ok(FieldIdentifier {
367369
field_type: FieldType::Stop,
368370
id: 0,
369-
bool_val,
370-
}),
371-
_ => {
372-
// special handling for bools
373-
if field_type == FieldType::BooleanFalse {
374-
bool_val = Some(false);
375-
} else if field_type == FieldType::BooleanTrue {
376-
bool_val = Some(true);
377-
}
378-
let field_id = if field_delta != 0 {
379-
last_field_id.checked_add(field_delta as i16).ok_or(
380-
ThriftProtocolError::FieldDeltaOverflow {
381-
field_delta,
382-
last_field_id,
383-
},
384-
)?
385-
} else {
386-
self.read_full_field_id()?
387-
};
388-
389-
Ok(FieldIdentifier {
390-
field_type,
391-
id: field_id,
392-
bool_val,
393-
})
394-
}
371+
});
395372
}
373+
374+
let field_delta = (field_type & 0xf0) >> 4;
375+
let field_type = FieldType::try_from(field_type & 0xf)?;
376+
377+
let id = if field_delta != 0 {
378+
last_field_id.checked_add(field_delta as i16).ok_or(
379+
ThriftProtocolError::FieldDeltaOverflow {
380+
field_delta,
381+
last_field_id,
382+
},
383+
)?
384+
} else {
385+
self.read_full_field_id()?
386+
};
387+
388+
Ok(FieldIdentifier { field_type, id })
396389
}
397390

398391
/// This is a specialized version of [`Self::read_field_begin`], solely for use in parsing

0 commit comments

Comments
 (0)