@@ -153,11 +153,18 @@ pub(crate) fn core_storage_without_typed_value(
153153 . map ( IntoArray :: into_array)
154154}
155155
156+ /// Converts a Parquet `typed_value` tree into the storage-agnostic canonical shredded tree.
157+ ///
158+ /// Parquet shredding represents nested fields with wrapper structs containing `value` and/or
159+ /// `typed_value`. This strips those wrappers, preserves list/struct shape, and leaves primitive
160+ /// typed values unchanged.
156161pub ( crate ) fn logical_shredded_from_parquet_typed_value (
157162 metadata : & ArrayRef ,
158163 typed_value : ArrayRef ,
159164) -> VortexResult < ArrayRef > {
160165 if let Some ( list_array) = typed_value. as_opt :: < List > ( ) {
166+ // Lists keep their original offsets and validity; only the physical element
167+ // representation may need wrapper removal.
161168 let elements =
162169 logical_shredded_from_parquet_field ( metadata, list_array. elements ( ) . clone ( ) ) ?
163170 . unwrap_or_else ( || list_array. elements ( ) . clone ( ) ) ;
@@ -173,6 +180,8 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
173180 return Ok ( typed_value) ;
174181 } ;
175182
183+ // For object shredding, each struct field is a logical object field. Fields that
184+ // are known wrapper shells without typed data are omitted from the canonical tree.
176185 let mut names = Vec :: new ( ) ;
177186 let mut fields = Vec :: new ( ) ;
178187 for ( name, field) in struct_array
@@ -195,6 +204,10 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
195204 . into_array ( ) )
196205}
197206
207+ /// Converts one Parquet shredded field to the corresponding canonical shredded child.
208+ ///
209+ /// Returns `None` when the field is only a Parquet wrapper with no `typed_value`; that means the
210+ /// logical field is not represented in shredded storage and must be served from raw `value`.
198211fn logical_shredded_from_parquet_field (
199212 metadata : & ArrayRef ,
200213 field : ArrayRef ,
@@ -212,6 +225,8 @@ fn logical_shredded_from_parquet_field(
212225 return Ok ( None ) ;
213226 } ;
214227 let validity = field_struct. validity ( ) ?;
228+ // `unmasked_field_by_name_opt` intentionally ignores the parent struct validity.
229+ // Reapply it here so null wrapper rows become null typed/raw rows downstream.
215230 let typed_value = if validity. no_nulls ( ) {
216231 typed_value. clone ( )
217232 } else {
@@ -231,9 +246,13 @@ fn logical_shredded_from_parquet_field(
231246 . transpose ( ) ?;
232247
233248 let Some ( value) = value else {
249+ // Fully shredded field: recurse through the typed subtree and expose its
250+ // logical shape directly.
234251 return logical_shredded_from_parquet_typed_value ( metadata, typed_value) . map ( Some ) ;
235252 } ;
236253
254+ // Partially shredded terminal object: keep raw `value` available as the nested
255+ // Variant core storage while exposing any typed children as nested `shredded`.
237256 let validity = inferred_shredded_field_validity ( Some ( & value) , Some ( & typed_value) ) ?;
238257 let parquet_field =
239258 ParquetVariant :: try_new ( validity, metadata. clone ( ) , Some ( value) , Some ( typed_value) ) ?;
0 commit comments