From 1081e7afe510794fc9eb9db35ca25c9bb0dece65 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Thu, 7 May 2026 07:50:36 -0700 Subject: [PATCH 1/3] remove bool_val from FieldIdentifier --- parquet/src/file/metadata/thrift/mod.rs | 10 +--- parquet/src/parquet_macros.rs | 5 +- parquet/src/parquet_thrift.rs | 63 +++++++++++-------------- 3 files changed, 31 insertions(+), 47 deletions(-) diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs index 75fcad871e2f..9be697c0fe43 100644 --- a/parquet/src/file/metadata/thrift/mod.rs +++ b/parquet/src/file/metadata/thrift/mod.rs @@ -1135,13 +1135,7 @@ impl DataPageHeaderV2 { repetition_levels_byte_length = Some(val); } 7 => { - if field_ident.bool_val.is_none() { - return Err(general_err!( - "Expected bool field but got thrift type {:?}", - field_ident.field_type - )); - } - is_compressed = field_ident.bool_val; + is_compressed = Some(field_ident.bool_val()?); } _ => { prot.skip(field_ident.field_type)?; @@ -1837,7 +1831,7 @@ pub(crate) mod tests { fn assert_malformed_bool_error(err: crate::errors::ParquetError) { let msg = err.to_string(); assert!( - msg.contains("Expected bool field"), + msg.contains("Unexpected struct field type"), "unexpected error message: {msg}" ); } diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 13c476343164..8bb2ad23b054 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -464,10 +464,7 @@ macro_rules! __thrift_read_field { $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)? }; ($prot:tt, $field_ident:tt, bool) => { - $field_ident.bool_val.ok_or_else(|| general_err!( - "Expected bool field but got thrift type {:?}", - $field_ident.field_type - ))? + $field_ident.bool_val()? }; ($prot:tt, $field_ident:tt, $field_type:ident) => { $field_type::read_thrift(&mut *$prot)? diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index b9d69b9d52e8..e04e532677a5 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -59,9 +59,10 @@ impl From for ParquetError { match e { ThriftProtocolError::Eof => eof_err!("Unexpected EOF"), ThriftProtocolError::IO(e) => e.into(), - ThriftProtocolError::InvalidFieldType(value) => { - general_err!("Unexpected struct field type {}", value) - } + ThriftProtocolError::InvalidFieldType(value) => match FieldType::try_from(value) { + Ok(fld_type) => general_err!("Unexpected struct field type {:?}", fld_type), + Err(_) => general_err!("Unexpected struct field type {}", value), + }, ThriftProtocolError::InvalidElementType(value) => { general_err!("Unexpected list/set element type {}", value) } @@ -247,11 +248,16 @@ pub(crate) struct FieldIdentifier { pub(crate) field_type: FieldType, /// The field's `id`. May be computed from delta or directly decoded. pub(crate) id: i16, - /// Stores the value for booleans. - /// - /// Boolean fields store no data, instead the field type is either boolean true, or - /// boolean false. - pub(crate) bool_val: Option, +} + +impl FieldIdentifier { + pub(crate) fn bool_val(&self) -> ThriftProtocolResult { + match self.field_type { + FieldType::BooleanTrue => Ok(true), + FieldType::BooleanFalse => Ok(false), + _ => Err(ThriftProtocolError::InvalidFieldType(self.field_type as u8)), + } + } } /// Struct used to describe a [thrift list]. @@ -360,37 +366,24 @@ pub(crate) trait ThriftCompactInputProtocol<'a> { let field_type = self.read_byte()?; let field_delta = (field_type & 0xf0) >> 4; let field_type = FieldType::try_from(field_type & 0xf)?; - let mut bool_val: Option = None; - match field_type { - FieldType::Stop => Ok(FieldIdentifier { + match (field_type, field_delta) { + (FieldType::Stop, _) => Ok(FieldIdentifier { field_type: FieldType::Stop, id: 0, - bool_val, }), - _ => { - // special handling for bools - if field_type == FieldType::BooleanFalse { - bool_val = Some(false); - } else if field_type == FieldType::BooleanTrue { - bool_val = Some(true); - } - let field_id = if field_delta != 0 { - last_field_id.checked_add(field_delta as i16).ok_or( - ThriftProtocolError::FieldDeltaOverflow { - field_delta, - last_field_id, - }, - )? - } else { - self.read_full_field_id()? - }; - - Ok(FieldIdentifier { - field_type, - id: field_id, - bool_val, - }) + (field_type, 0) => { + let id = self.read_full_field_id()?; + Ok(FieldIdentifier { field_type, id }) + } + (field_type, field_delta) => { + let id = last_field_id.checked_add(field_delta as i16).ok_or( + ThriftProtocolError::FieldDeltaOverflow { + field_delta, + last_field_id, + }, + )?; + Ok(FieldIdentifier { field_type, id }) } } } From 00326ea4dc02309428ac2cdfb82a9ac7d0c29303 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 11 May 2026 15:31:16 -0700 Subject: [PATCH 2/3] revert to original branching --- parquet/src/parquet_thrift.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index e04e532677a5..018ec702e44d 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -367,22 +367,23 @@ pub(crate) trait ThriftCompactInputProtocol<'a> { let field_delta = (field_type & 0xf0) >> 4; let field_type = FieldType::try_from(field_type & 0xf)?; - match (field_type, field_delta) { - (FieldType::Stop, _) => Ok(FieldIdentifier { + match field_type { + FieldType::Stop => Ok(FieldIdentifier { field_type: FieldType::Stop, id: 0, }), - (field_type, 0) => { - let id = self.read_full_field_id()?; - Ok(FieldIdentifier { field_type, id }) - } - (field_type, field_delta) => { - let id = last_field_id.checked_add(field_delta as i16).ok_or( - ThriftProtocolError::FieldDeltaOverflow { - field_delta, - last_field_id, - }, - )?; + _ => { + let id = if field_delta != 0 { + last_field_id.checked_add(field_delta as i16).ok_or( + ThriftProtocolError::FieldDeltaOverflow { + field_delta, + last_field_id, + }, + )? + } else { + self.read_full_field_id()? + }; + Ok(FieldIdentifier { field_type, id }) } } From 99de9817d9dcaa63c6f4842326fe6ab4b8f6f972 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 11 May 2026 19:02:51 -0700 Subject: [PATCH 3/3] try earlier exit for stop --- parquet/src/parquet_thrift.rs | 39 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 018ec702e44d..7567ace08bab 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -364,29 +364,28 @@ pub(crate) trait ThriftCompactInputProtocol<'a> { // - the type // - the field delta and the type let field_type = self.read_byte()?; - let field_delta = (field_type & 0xf0) >> 4; - let field_type = FieldType::try_from(field_type & 0xf)?; - - match field_type { - FieldType::Stop => Ok(FieldIdentifier { + if field_type & 0xf == 0 { + return Ok(FieldIdentifier { field_type: FieldType::Stop, id: 0, - }), - _ => { - let id = if field_delta != 0 { - last_field_id.checked_add(field_delta as i16).ok_or( - ThriftProtocolError::FieldDeltaOverflow { - field_delta, - last_field_id, - }, - )? - } else { - self.read_full_field_id()? - }; - - Ok(FieldIdentifier { field_type, id }) - } + }); } + + let field_delta = (field_type & 0xf0) >> 4; + let field_type = FieldType::try_from(field_type & 0xf)?; + + let id = if field_delta != 0 { + last_field_id.checked_add(field_delta as i16).ok_or( + ThriftProtocolError::FieldDeltaOverflow { + field_delta, + last_field_id, + }, + )? + } else { + self.read_full_field_id()? + }; + + Ok(FieldIdentifier { field_type, id }) } /// This is a specialized version of [`Self::read_field_begin`], solely for use in parsing