Skip to content

Commit 6d3804d

Browse files
committed
docs
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 629d132 commit 6d3804d

1 file changed

Lines changed: 19 additions & 0 deletions

File tree

encodings/parquet-variant/src/array.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,18 @@ pub(crate) fn core_storage_without_typed_value(
153153
.map(IntoArray::into_array)
154154
}
155155

156+
/// Converts a Parquet `typed_value` tree into the storage-agnostic canonical shredded tree.
157+
///
158+
/// Parquet shredding represents nested fields with wrapper structs containing `value` and/or
159+
/// `typed_value`. This strips those wrappers, preserves list/struct shape, and leaves primitive
160+
/// typed values unchanged.
156161
pub(crate) fn logical_shredded_from_parquet_typed_value(
157162
metadata: &ArrayRef,
158163
typed_value: ArrayRef,
159164
) -> VortexResult<ArrayRef> {
160165
if let Some(list_array) = typed_value.as_opt::<List>() {
166+
// Lists keep their original offsets and validity; only the physical element
167+
// representation may need wrapper removal.
161168
let elements =
162169
logical_shredded_from_parquet_field(metadata, list_array.elements().clone())?
163170
.unwrap_or_else(|| list_array.elements().clone());
@@ -173,6 +180,8 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
173180
return Ok(typed_value);
174181
};
175182

183+
// For object shredding, each struct field is a logical object field. Fields that
184+
// are known wrapper shells without typed data are omitted from the canonical tree.
176185
let mut names = Vec::new();
177186
let mut fields = Vec::new();
178187
for (name, field) in struct_array
@@ -195,6 +204,10 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
195204
.into_array())
196205
}
197206

207+
/// Converts one Parquet shredded field to the corresponding canonical shredded child.
208+
///
209+
/// Returns `None` when the field is only a Parquet wrapper with no `typed_value`; that means the
210+
/// logical field is not represented in shredded storage and must be served from raw `value`.
198211
fn logical_shredded_from_parquet_field(
199212
metadata: &ArrayRef,
200213
field: ArrayRef,
@@ -212,6 +225,8 @@ fn logical_shredded_from_parquet_field(
212225
return Ok(None);
213226
};
214227
let validity = field_struct.validity()?;
228+
// `unmasked_field_by_name_opt` intentionally ignores the parent struct validity.
229+
// Reapply it here so null wrapper rows become null typed/raw rows downstream.
215230
let typed_value = if validity.no_nulls() {
216231
typed_value.clone()
217232
} else {
@@ -231,9 +246,13 @@ fn logical_shredded_from_parquet_field(
231246
.transpose()?;
232247

233248
let Some(value) = value else {
249+
// Fully shredded field: recurse through the typed subtree and expose its
250+
// logical shape directly.
234251
return logical_shredded_from_parquet_typed_value(metadata, typed_value).map(Some);
235252
};
236253

254+
// Partially shredded terminal object: keep raw `value` available as the nested
255+
// Variant core storage while exposing any typed children as nested `shredded`.
237256
let validity = inferred_shredded_field_validity(Some(&value), Some(&typed_value))?;
238257
let parquet_field =
239258
ParquetVariant::try_new(validity, metadata.clone(), Some(value), Some(typed_value))?;

0 commit comments

Comments
 (0)