Skip to content

Commit 71089dd

Browse files
feat: implement IsNotNull expression in vortex expression library (#6969)
### Summary Closes: #6040 Add a first-class IsNotNull scalar function, replacing the previous Not(IsNull(...)) composition pattern. This simplifies the expression tree and enables direct stat_falsification for zone map pruning. Changes: New is_not_null.rs with ScalarFnVTable implementation, including stat_falsification using is_constant && null_count > 0 (with TODO for future RowCount stat) Updated all integration points: DataFusion, DuckDB, Python/Substrait to use is_not_null(...) directly Replaced the Not(IsNull(...)) fallback in erased.rs validity with IsNotNull Registered IsNotNull in ScalarFnSession and ExprBuiltins/ArrayBuiltins ### AI Assistance Disclosure This PR was developed with AI assistance (Kiro). AI was used for code review, implementing stat_falsification, writing tests, and drafting the PR description. All output was reviewed and validated by the author. API Changes New public APIs: vortex_array::expr::is_not_null(child) — creates an IsNotNull expression Expression::is_not_null() / ArrayRef::is_not_null() via ExprBuiltins/ArrayBuiltins traits Python: vortex._lib.expr.is_not_null(child) ### Testing 9 unit tests covering: return dtype, child replacement, mixed/all-valid/all-invalid evaluation, struct field access, display formatting, null sensitivity, and stat falsification pruning expression generation. --------- Signed-off-by: Xiaoxuan Li <xioxuan@amazon.com> Signed-off-by: Robert Kruszewski <github@robertk.io> Co-authored-by: Robert Kruszewski <github@robertk.io>
1 parent 8dc8c96 commit 71089dd

18 files changed

Lines changed: 575 additions & 23 deletions

File tree

java/vortex-jni/src/main/java/dev/vortex/api/Expression.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,14 @@ interface Visitor<T> {
9595
*/
9696
T visitIsNull(IsNull isNull);
9797

98+
/**
99+
* Visits an is not null expression (non-null check).
100+
*
101+
* @param isNotNull the is not null expression to visit
102+
* @return the result of visiting the is not null expression
103+
*/
104+
T visitIsNotNull(IsNotNull isNotNull);
105+
98106
/**
99107
* For expressions that do not have a specific visitor method.
100108
*/
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
package dev.vortex.api.expressions;
5+
6+
import dev.vortex.api.Expression;
7+
import java.util.List;
8+
import java.util.Objects;
9+
import java.util.Optional;
10+
11+
/**
12+
* Represents an IS NOT NULL expression that checks whether values are non-null.
13+
* This expression returns true for non-null values and false for null values.
14+
*/
15+
public final class IsNotNull implements Expression {
16+
private final Expression child;
17+
18+
private IsNotNull(Expression child) {
19+
this.child = child;
20+
}
21+
22+
/**
23+
* Parses an IsNotNull expression from serialized metadata and child expressions.
24+
* This method is used during deserialization of Vortex expressions.
25+
*
26+
* @param metadata the serialized metadata, must be empty for IsNotNull expressions
27+
* @param children the child expressions, must contain exactly one element
28+
* @return a new IsNotNull expression parsed from the provided data
29+
* @throws IllegalArgumentException if the number of children is not exactly one,
30+
* or if metadata is not empty
31+
*/
32+
public static IsNotNull parse(byte[] metadata, List<Expression> children) {
33+
if (children.size() != 1) {
34+
throw new IllegalArgumentException(
35+
"IsNotNull expression must have exactly one child, found: " + children.size());
36+
}
37+
if (metadata.length > 0) {
38+
throw new IllegalArgumentException(
39+
"IsNotNull expression must not have metadata, found: " + metadata.length);
40+
}
41+
return new IsNotNull(children.get(0));
42+
}
43+
44+
/**
45+
* Creates a new IsNotNull expression that checks non-nullity of the given child expression.
46+
*
47+
* @param child the expression to check for non-null values
48+
* @return a new IsNotNull expression
49+
*/
50+
public static IsNotNull of(Expression child) {
51+
return new IsNotNull(child);
52+
}
53+
54+
@Override
55+
public boolean equals(Object o) {
56+
if (o == null || getClass() != o.getClass()) return false;
57+
IsNotNull other = (IsNotNull) o;
58+
return Objects.equals(child, other.child);
59+
}
60+
61+
@Override
62+
public int hashCode() {
63+
return Objects.hash(child);
64+
}
65+
66+
@Override
67+
public String id() {
68+
return "vortex.is_not_null";
69+
}
70+
71+
@Override
72+
public List<Expression> children() {
73+
return List.of(child);
74+
}
75+
76+
@Override
77+
public Optional<byte[]> metadata() {
78+
return Optional.of(new byte[] {});
79+
}
80+
81+
@Override
82+
public String toString() {
83+
return "vortex.is_not_null(" + child + ")";
84+
}
85+
86+
/**
87+
* Returns the child expression that will be checked for non-null values.
88+
*
89+
* @return the child expression
90+
*/
91+
public Expression getChild() {
92+
return child;
93+
}
94+
95+
@Override
96+
public <T> T accept(Visitor<T> visitor) {
97+
return visitor.visitIsNotNull(this);
98+
}
99+
}

java/vortex-jni/src/main/java/dev/vortex/api/proto/Expressions.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ public static ExprProtos.Expr serialize(Expression expression) {
3434
/**
3535
* Deserialize a protocol buffer representation back into an {@link Expression} object.
3636
* The method examines the expression ID and creates the appropriate concrete expression type
37-
* based on the registered expression types (binary, get_item, root, literal, not).
37+
* based on the registered expression types (binary, get_item, root, literal, not, is null,
38+
* is not null).
3839
* If the expression ID is not recognized, an {@link Unknown} expression is created.
3940
*
4041
* @param expr the protocol buffer expression to deserialize
@@ -58,6 +59,8 @@ public static Expression deserialize(ExprProtos.Expr expr) {
5859
return Not.parse(metadata, children);
5960
case "vortex.is_null":
6061
return IsNull.parse(metadata, children);
62+
case "vortex.is_not_null":
63+
return IsNotNull.parse(metadata, children);
6164
default:
6265
return new Unknown(expr.getId(), children, expr.getMetadata().toByteArray());
6366
}

java/vortex-jni/src/test/java/dev/vortex/api/expressions/proto/TestExpressionProtos.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,12 @@ public void testIsNullRoundTrip() {
3030
Expression deserialized = Expressions.deserialize(proto);
3131
assertEquals(expression, deserialized);
3232
}
33+
34+
@Test
35+
public void testIsNotNullRoundTrip() {
36+
Expression expression = IsNotNull.of(GetItem.of(Root.INSTANCE, "a.b.c"));
37+
ExprProtos.Expr proto = Expressions.serialize(expression);
38+
Expression deserialized = Expressions.deserialize(proto);
39+
assertEquals(expression, deserialized);
40+
}
3341
}

vortex-array/public-api.lock

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8440,6 +8440,8 @@ pub fn vortex_array::builtins::ArrayBuiltins::fill_null(&self, fill_value: impl
84408440

84418441
pub fn vortex_array::builtins::ArrayBuiltins::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::ArrayRef>
84428442

8443+
pub fn vortex_array::builtins::ArrayBuiltins::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
8444+
84438445
pub fn vortex_array::builtins::ArrayBuiltins::is_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
84448446

84458447
pub fn vortex_array::builtins::ArrayBuiltins::list_contains(&self, value: vortex_array::ArrayRef) -> vortex_error::VortexResult<vortex_array::ArrayRef>
@@ -8462,6 +8464,8 @@ pub fn vortex_array::ArrayRef::fill_null(&self, fill_value: impl core::convert::
84628464

84638465
pub fn vortex_array::ArrayRef::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::ArrayRef>
84648466

8467+
pub fn vortex_array::ArrayRef::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
8468+
84658469
pub fn vortex_array::ArrayRef::is_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
84668470

84678471
pub fn vortex_array::ArrayRef::list_contains(&self, value: vortex_array::ArrayRef) -> vortex_error::VortexResult<vortex_array::ArrayRef>
@@ -8482,6 +8486,8 @@ pub fn vortex_array::builtins::ExprBuiltins::fill_null(&self, fill_value: vortex
84828486

84838487
pub fn vortex_array::builtins::ExprBuiltins::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::expr::Expression>
84848488

8489+
pub fn vortex_array::builtins::ExprBuiltins::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
8490+
84858491
pub fn vortex_array::builtins::ExprBuiltins::is_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
84868492

84878493
pub fn vortex_array::builtins::ExprBuiltins::list_contains(&self, value: vortex_array::expr::Expression) -> vortex_error::VortexResult<vortex_array::expr::Expression>
@@ -8502,6 +8508,8 @@ pub fn vortex_array::expr::Expression::fill_null(&self, fill_value: vortex_array
85028508

85038509
pub fn vortex_array::expr::Expression::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::expr::Expression>
85048510

8511+
pub fn vortex_array::expr::Expression::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
8512+
85058513
pub fn vortex_array::expr::Expression::is_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
85068514

85078515
pub fn vortex_array::expr::Expression::list_contains(&self, value: vortex_array::expr::Expression) -> vortex_error::VortexResult<vortex_array::expr::Expression>
@@ -12426,6 +12434,8 @@ pub fn vortex_array::expr::Expression::fill_null(&self, fill_value: vortex_array
1242612434

1242712435
pub fn vortex_array::expr::Expression::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::expr::Expression>
1242812436

12437+
pub fn vortex_array::expr::Expression::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
12438+
1242912439
pub fn vortex_array::expr::Expression::is_null(&self) -> vortex_error::VortexResult<vortex_array::expr::Expression>
1243012440

1243112441
pub fn vortex_array::expr::Expression::list_contains(&self, value: vortex_array::expr::Expression) -> vortex_error::VortexResult<vortex_array::expr::Expression>
@@ -12520,6 +12530,8 @@ pub fn vortex_array::expr::immediate_scope_access<'a>(expr: &'a vortex_array::ex
1252012530

1252112531
pub fn vortex_array::expr::immediate_scope_accesses<'a>(expr: &'a vortex_array::expr::Expression, scope: &'a vortex_array::dtype::StructFields) -> vortex_array::expr::FieldAccesses<'a>
1252212532

12533+
pub fn vortex_array::expr::is_not_null(child: vortex_array::expr::Expression) -> vortex_array::expr::Expression
12534+
1252312535
pub fn vortex_array::expr::is_null(child: vortex_array::expr::Expression) -> vortex_array::expr::Expression
1252412536

1252512537
pub fn vortex_array::expr::is_root(expr: &vortex_array::expr::Expression) -> bool
@@ -16302,6 +16314,52 @@ pub fn vortex_array::scalar_fn::fns::get_item::GetItem::stat_falsification(&self
1630216314

1630316315
pub fn vortex_array::scalar_fn::fns::get_item::GetItem::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
1630416316

16317+
pub mod vortex_array::scalar_fn::fns::is_not_null
16318+
16319+
pub struct vortex_array::scalar_fn::fns::is_not_null::IsNotNull
16320+
16321+
impl core::clone::Clone for vortex_array::scalar_fn::fns::is_not_null::IsNotNull
16322+
16323+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::clone(&self) -> vortex_array::scalar_fn::fns::is_not_null::IsNotNull
16324+
16325+
impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::is_not_null::IsNotNull
16326+
16327+
pub type vortex_array::scalar_fn::fns::is_not_null::IsNotNull::Options = vortex_array::scalar_fn::EmptyOptions
16328+
16329+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::arity(&self, _options: &Self::Options) -> vortex_array::scalar_fn::Arity
16330+
16331+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName
16332+
16333+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<alloc::vec::Vec<vortex_array::dtype::DType>>
16334+
16335+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
16336+
16337+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::execute(&self, _data: &Self::Options, args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
16338+
16339+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::fmt_sql(&self, _options: &Self::Options, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
16340+
16341+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::id(&self) -> vortex_array::scalar_fn::ScalarFnId
16342+
16343+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::is_fallible(&self, _instance: &Self::Options) -> bool
16344+
16345+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::is_null_sensitive(&self, _instance: &Self::Options) -> bool
16346+
16347+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::scalar_fn::ReduceNodeRef>>
16348+
16349+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::return_dtype(&self, _options: &Self::Options, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
16350+
16351+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::serialize(&self, _instance: &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
16352+
16353+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
16354+
16355+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
16356+
16357+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option<vortex_array::expr::Expression>
16358+
16359+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::stat_falsification(&self, _options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option<vortex_array::expr::Expression>
16360+
16361+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
16362+
1630516363
pub mod vortex_array::scalar_fn::fns::is_null
1630616364

1630716365
pub struct vortex_array::scalar_fn::fns::is_null::IsNull
@@ -18084,6 +18142,44 @@ pub fn vortex_array::scalar_fn::fns::get_item::GetItem::stat_falsification(&self
1808418142

1808518143
pub fn vortex_array::scalar_fn::fns::get_item::GetItem::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
1808618144

18145+
impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::is_not_null::IsNotNull
18146+
18147+
pub type vortex_array::scalar_fn::fns::is_not_null::IsNotNull::Options = vortex_array::scalar_fn::EmptyOptions
18148+
18149+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::arity(&self, _options: &Self::Options) -> vortex_array::scalar_fn::Arity
18150+
18151+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName
18152+
18153+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<alloc::vec::Vec<vortex_array::dtype::DType>>
18154+
18155+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
18156+
18157+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::execute(&self, _data: &Self::Options, args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
18158+
18159+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::fmt_sql(&self, _options: &Self::Options, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
18160+
18161+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::id(&self) -> vortex_array::scalar_fn::ScalarFnId
18162+
18163+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::is_fallible(&self, _instance: &Self::Options) -> bool
18164+
18165+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::is_null_sensitive(&self, _instance: &Self::Options) -> bool
18166+
18167+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::reduce(&self, options: &Self::Options, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::scalar_fn::ReduceNodeRef>>
18168+
18169+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::return_dtype(&self, _options: &Self::Options, _arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<vortex_array::dtype::DType>
18170+
18171+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::serialize(&self, _instance: &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
18172+
18173+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
18174+
18175+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
18176+
18177+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option<vortex_array::expr::Expression>
18178+
18179+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::stat_falsification(&self, _options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option<vortex_array::expr::Expression>
18180+
18181+
pub fn vortex_array::scalar_fn::fns::is_not_null::IsNotNull::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
18182+
1808718183
impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::is_null::IsNull
1808818184

1808918185
pub type vortex_array::scalar_fn::fns::is_null::IsNull::Options = vortex_array::scalar_fn::EmptyOptions
@@ -22410,6 +22506,8 @@ pub fn vortex_array::ArrayRef::fill_null(&self, fill_value: impl core::convert::
2241022506

2241122507
pub fn vortex_array::ArrayRef::get_item(&self, field_name: impl core::convert::Into<vortex_array::dtype::FieldName>) -> vortex_error::VortexResult<vortex_array::ArrayRef>
2241222508

22509+
pub fn vortex_array::ArrayRef::is_not_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
22510+
2241322511
pub fn vortex_array::ArrayRef::is_null(&self) -> vortex_error::VortexResult<vortex_array::ArrayRef>
2241422512

2241522513
pub fn vortex_array::ArrayRef::list_contains(&self, value: vortex_array::ArrayRef) -> vortex_error::VortexResult<vortex_array::ArrayRef>

0 commit comments

Comments
 (0)