Skip to content

Commit 4fc5e3d

Browse files
committed
Self-CR things
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 9a538ec commit 4fc5e3d

4 files changed

Lines changed: 43 additions & 36 deletions

File tree

encodings/parquet-variant/src/array.rs

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use vortex_array::builtins::ArrayBuiltins;
2929
use vortex_array::dtype::DType;
3030
use vortex_array::dtype::FieldName;
3131
use vortex_array::dtype::FieldNames;
32+
use vortex_array::dtype::Nullability;
3233
use vortex_array::smallvec::smallvec;
3334
use vortex_array::validity::Validity;
3435
use vortex_array::vtable::child_to_validity;
@@ -161,12 +162,13 @@ pub(crate) fn core_storage_without_typed_value(
161162
pub(crate) fn logical_shredded_from_parquet_typed_value(
162163
metadata: &ArrayRef,
163164
typed_value: ArrayRef,
165+
ctx: &mut ExecutionCtx,
164166
) -> VortexResult<ArrayRef> {
165167
if let Some(list_array) = typed_value.as_opt::<List>() {
166168
// Lists keep their original offsets and validity; only the physical element
167169
// representation may need wrapper removal.
168170
let elements =
169-
logical_shredded_from_parquet_field(metadata, list_array.elements().clone())?
171+
logical_shredded_from_parquet_field(metadata, list_array.elements().clone(), ctx)?
170172
.unwrap_or_else(|| list_array.elements().clone());
171173
return Ok(ListArray::try_new(
172174
elements,
@@ -189,7 +191,9 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
189191
.iter()
190192
.zip(struct_array.iter_unmasked_fields())
191193
{
192-
if let Some(logical_field) = logical_shredded_from_parquet_field(metadata, field.clone())? {
194+
if let Some(logical_field) =
195+
logical_shredded_from_parquet_field(metadata, field.clone(), ctx)?
196+
{
193197
names.push(FieldName::from(name.as_ref()));
194198
fields.push(logical_field);
195199
}
@@ -211,6 +215,7 @@ pub(crate) fn logical_shredded_from_parquet_typed_value(
211215
fn logical_shredded_from_parquet_field(
212216
metadata: &ArrayRef,
213217
field: ArrayRef,
218+
ctx: &mut ExecutionCtx,
214219
) -> VortexResult<Option<ArrayRef>> {
215220
let Some(field_struct) = field.as_opt::<Struct>() else {
216221
return Ok(Some(field));
@@ -248,48 +253,49 @@ fn logical_shredded_from_parquet_field(
248253
let Some(value) = value else {
249254
// Fully shredded field: recurse through the typed subtree and expose its
250255
// logical shape directly.
251-
return logical_shredded_from_parquet_typed_value(metadata, typed_value).map(Some);
256+
return logical_shredded_from_parquet_typed_value(metadata, typed_value, ctx).map(Some);
252257
};
253258

254259
// Partially shredded terminal object: keep raw `value` available as the nested
255260
// Variant core storage while exposing any typed children as nested `shredded`.
256-
let validity = inferred_shredded_field_validity(Some(&value), Some(&typed_value))?;
261+
let validity = inferred_shredded_field_validity(Some(&value), Some(&typed_value), ctx)?;
257262
let parquet_field =
258263
ParquetVariant::try_new(validity, metadata.clone(), Some(value), Some(typed_value))?;
259264
let shredded = parquet_field
260265
.typed_value_array()
261266
.cloned()
262-
.map(|typed_value| logical_shredded_from_parquet_typed_value(metadata, typed_value))
267+
.map(|typed_value| {
268+
logical_shredded_from_parquet_typed_value(metadata, typed_value, ctx)
269+
})
263270
.transpose()?;
264271
return VariantArray::try_new(core_storage_without_typed_value(&parquet_field)?, shredded)
265272
.map(|array| Some(array.into_array()));
266273
}
267274

268-
logical_shredded_from_parquet_typed_value(metadata, field).map(Some)
275+
logical_shredded_from_parquet_typed_value(metadata, field, ctx).map(Some)
269276
}
270277

271278
fn inferred_shredded_field_validity(
272279
value: Option<&ArrayRef>,
273280
typed_value: Option<&ArrayRef>,
281+
ctx: &mut ExecutionCtx,
274282
) -> VortexResult<Validity> {
275283
let len = value
276284
.or(typed_value)
277285
.map(ArrayRef::len)
278286
.vortex_expect("at least one shredded field child");
279-
let validity = (0..len)
280-
.map(|idx| {
281-
let value_valid = value
282-
.map(|value| value.validity()?.is_valid(idx))
283-
.transpose()?
284-
.unwrap_or(false);
285-
let typed_valid = typed_value
286-
.map(|typed_value| typed_value.validity()?.is_valid(idx))
287-
.transpose()?
288-
.unwrap_or(false);
289-
Ok(value_valid || typed_valid)
290-
})
291-
.collect::<VortexResult<Vec<_>>>()?;
292-
Ok(Validity::from_iter(validity))
287+
let value_mask = value
288+
.map(|value| value.validity()?.execute_mask(len, ctx))
289+
.transpose()?;
290+
let typed_mask = typed_value
291+
.map(|typed_value| typed_value.validity()?.execute_mask(len, ctx))
292+
.transpose()?;
293+
let validity = match (value_mask, typed_mask) {
294+
(Some(value_mask), Some(typed_mask)) => &value_mask | &typed_mask,
295+
(Some(mask), None) | (None, Some(mask)) => mask,
296+
(None, None) => unreachable!("at least one shredded field child"),
297+
};
298+
Ok(Validity::from_mask(validity, Nullability::Nullable))
293299
}
294300

295301
/// Accessors and Arrow conversion for Parquet Variant storage arrays.

encodings/parquet-variant/src/vtable.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -253,12 +253,12 @@ impl VTable for ParquetVariant {
253253
Ok(ArrayParts::new(self.clone(), dtype.clone(), len, EmptyArrayData).with_slots(slots))
254254
}
255255

256-
fn execute(array: Array<Self>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
256+
fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
257257
let shredded = array
258258
.typed_value_array()
259259
.cloned()
260260
.map(|typed_value| {
261-
logical_shredded_from_parquet_typed_value(array.metadata_array(), typed_value)
261+
logical_shredded_from_parquet_typed_value(array.metadata_array(), typed_value, ctx)
262262
})
263263
.transpose()?;
264264
let core_storage = core_storage_without_typed_value(&array)?;
@@ -350,16 +350,6 @@ mod tests {
350350
.unwrap()
351351
}
352352

353-
fn file_session() -> VortexSession {
354-
let session = VortexSession::empty()
355-
.with::<ArraySession>()
356-
.with::<LayoutSession>()
357-
.with::<RuntimeSession>();
358-
vortex_file::register_default_encodings(&session);
359-
session.arrays().register(ParquetVariant);
360-
session
361-
}
362-
363353
#[test]
364354
fn test_execute_exposes_typed_value_as_canonical_shredded() -> VortexResult<()> {
365355
let metadata =
@@ -420,7 +410,13 @@ mod tests {
420410
let expected =
421411
ParquetVariant::from_arrow_variant(&ArrowVariantArray::try_new(&arrow_storage)?)?;
422412

423-
let session = file_session();
413+
let session = VortexSession::empty()
414+
.with::<ArraySession>()
415+
.with::<LayoutSession>()
416+
.with::<RuntimeSession>();
417+
vortex_file::register_default_encodings(&session);
418+
session.arrays().register(ParquetVariant);
419+
424420
let mut bytes = ByteBufferMut::empty();
425421
session
426422
.write_options()

vortex-array/src/expr/exprs.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ pub fn get_item(field: impl Into<FieldName>, child: Expression) -> Expression {
120120

121121
/// Creates an expression that extracts a path from a Variant expression.
122122
///
123-
/// When `dtype` is `None`, the expression returns nullable Variant values. When `dtype` is
124-
/// provided, the expression returns that dtype with nullable nullability.
123+
/// Missing paths, traversal mismatches, and failed casts return null. When `dtype` is `None`,
124+
/// results are nullable Variant values; otherwise results are nullable values of `dtype`.
125125
pub fn variant_get(
126126
child: Expression,
127127
path: impl Into<VariantPath>,

vortex-array/src/scalar_fn/fns/variant_get/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@ use crate::scalar_fn::ExecutionArgs;
3131
use crate::scalar_fn::ScalarFnId;
3232
use crate::scalar_fn::ScalarFnVTable;
3333

34-
/// Extracts a path from a Variant expression, optionally as a typed result.
34+
/// Extracts a field/index path from Variant values.
35+
///
36+
/// Missing paths, type mismatches while traversing, and failed casts produce nulls. Without a
37+
/// requested dtype, results are returned as nullable Variant values; with one, results are cast to
38+
/// that dtype with nullable nullability. Encodings may serve perfectly shredded paths directly,
39+
/// but must fall back to the core Variant value for paths not represented by shredded storage.
3540
#[derive(Clone)]
3641
pub struct VariantGet;
3742

0 commit comments

Comments
 (0)