Skip to content

Commit bc6ab5f

Browse files
committed
Expose expressions for C API
Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 66236f8 commit bc6ab5f

7 files changed

Lines changed: 827 additions & 4 deletions

File tree

vortex-array/public-api.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11414,6 +11414,10 @@ impl core::convert::From<alloc::vec::Vec<&str>> for vortex_array::dtype::FieldNa
1141411414

1141511415
pub fn vortex_array::dtype::FieldNames::from(value: alloc::vec::Vec<&str>) -> Self
1141611416

11417+
impl core::convert::From<alloc::vec::Vec<alloc::string::String>> for vortex_array::dtype::FieldNames
11418+
11419+
pub fn vortex_array::dtype::FieldNames::from(value: alloc::vec::Vec<alloc::string::String>) -> Self
11420+
1141711421
impl core::convert::From<alloc::vec::Vec<alloc::sync::Arc<str>>> for vortex_array::dtype::FieldNames
1141811422

1141911423
pub fn vortex_array::dtype::FieldNames::from(value: alloc::vec::Vec<alloc::sync::Arc<str>>) -> Self

vortex-array/src/dtype/field_names.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,12 @@ impl From<Vec<FieldName>> for FieldNames {
317317
}
318318
}
319319

320+
impl From<Vec<String>> for FieldNames {
321+
fn from(value: Vec<String>) -> Self {
322+
value.into_iter().collect()
323+
}
324+
}
325+
320326
impl From<Vec<Arc<str>>> for FieldNames {
321327
fn from(value: Vec<Arc<str>>) -> Self {
322328
value.into_iter().collect()

vortex-ffi/cinclude/vortex.h

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,66 @@ typedef enum {
114114
DTYPE_FIXED_SIZE_LIST = 9,
115115
} vx_dtype_variant;
116116

117+
/**
118+
* Equalities, inequalities, and boolean operations over possibly null values.
119+
* For most operations, if either side is null, the result is null.
120+
* VX_OPERATOR_KLEENE_AND, VX_OPERATOR_KLEENE_OR obey Kleene (three-valued)
121+
* logic
122+
*/
123+
typedef enum {
124+
/**
125+
* Expressions are equal.
126+
*/
127+
VX_OPERATOR_EQ = 0,
128+
/**
129+
* Expressions are not equal.
130+
*/
131+
VX_OPERATOR_NOT_EQ = 1,
132+
/**
133+
* Expression is greater than another
134+
*/
135+
VX_OPERATOR_GT = 2,
136+
/**
137+
* Expression is greater or equal to another
138+
*/
139+
VX_OPERATOR_GTE = 3,
140+
/**
141+
* Expression is less than another
142+
*/
143+
VX_OPERATOR_LT = 4,
144+
/**
145+
* Expression is less or equal to another
146+
*/
147+
VX_OPERATOR_LTE = 5,
148+
/**
149+
* Boolean AND /\.
150+
*/
151+
VX_OPERATOR_KLEENE_AND = 6,
152+
/**
153+
* Boolean OR \/.
154+
*/
155+
VX_OPERATOR_KLEENE_OR = 7,
156+
/**
157+
* The sum of the arguments.
158+
* Errors at runtime if the sum would overflow or underflow.
159+
*/
160+
VX_OPERATOR_ADD = 8,
161+
/**
162+
* The difference between the arguments.
163+
* Errors at runtime if the sum would overflow or underflow.
164+
* The result is null at any index where either input is null.
165+
*/
166+
VX_OPERATOR_SUB = 9,
167+
/**
168+
* Multiple two numbers
169+
*/
170+
VX_OPERATOR_MUL = 10,
171+
/**
172+
* Divide the left side by the right side
173+
*/
174+
VX_OPERATOR_DIV = 11,
175+
} vx_operator;
176+
117177
/**
118178
* Log levels for the Vortex library.
119179
*/
@@ -297,6 +357,22 @@ typedef struct vx_dtype vx_dtype;
297357
*/
298358
typedef struct vx_error vx_error;
299359

360+
/**
361+
* A node in a Vortex expression tree.
362+
*
363+
* Expressions represent scalar computations that can be performed on
364+
* data. Each expression consists of an encoding (vtable), heap-allocated
365+
* metadata, and child expressions.
366+
*
367+
* Unless stated explicitly, all expressions returned are owned and must
368+
* be freed by the caller.
369+
* Unless stated explicitly, if an operation on const vx_expression* is
370+
* passed NULL, NULL is returned.
371+
* Operations on expressions don't take ownership of input values, and so
372+
* input values must be freed by the caller.
373+
*/
374+
typedef struct vx_expression vx_expression;
375+
300376
/**
301377
* A handle to a Vortex file encapsulating the footer and logic for instantiating a reader.
302378
*/
@@ -478,6 +554,11 @@ const vx_string *vx_array_get_utf8(const vx_array *array, uint32_t index);
478554
*/
479555
const vx_binary *vx_array_get_binary(const vx_array *array, uint32_t index);
480556

557+
/**
558+
* Apply the expression to the array, producing a new array in constant time
559+
*/
560+
const vx_array *vx_array_apply(const vx_array *array, const vx_expression *expression, vx_error **error);
561+
481562
/**
482563
* Free an owned [`vx_array_iterator`] object.
483564
*/
@@ -677,6 +758,117 @@ void vx_error_free(vx_error *ptr);
677758
*/
678759
const vx_string *vx_error_get_message(const vx_error *error);
679760

761+
/**
762+
* Clone a borrowed [`vx_expression`], returning an owned [`vx_expression`].
763+
*
764+
*
765+
* Must be released with [`vx_expression_free`].
766+
*/
767+
const vx_expression *vx_expression_clone(const vx_expression *ptr);
768+
769+
/**
770+
* Free an owned [`vx_expression`] object.
771+
*/
772+
void vx_expression_free(const vx_expression *ptr);
773+
774+
/**
775+
* Create a root expression
776+
*/
777+
const vx_expression *vx_expression_root(void);
778+
779+
/**
780+
* Create an expression that accesses a field from the root array.
781+
*
782+
* Equivalent to get_item(name, root()).
783+
*/
784+
const vx_expression *vx_expression_column(const char *name);
785+
786+
/**
787+
* Create an expression that selects (includes) specific fields from a child
788+
* expression. Child expression must have a DTYPE_STRUCT dtype.
789+
*
790+
* Example:
791+
*
792+
* const vx_expression* root = vx_expression_root();
793+
* const char* names[] = {"name", "age"};
794+
* const vx_expression* select = vx_expression_select(names, 2, root);
795+
* vx_expression_free(select);
796+
* vx_expression_free(root);
797+
*
798+
*/
799+
const vx_expression *vx_expression_select(const char *const *names, size_t len, const vx_expression *child);
800+
801+
/**
802+
* Create an AND expression for multiple child expressions.
803+
* If there are no input expressions, returns NULL.
804+
*/
805+
const vx_expression *vx_expression_and(const vx_expression *const *expressions, size_t len);
806+
807+
/**
808+
* Create an OR disjunction expression for multiple child expressions.
809+
* If there are no input expressions, returns NULL.
810+
*/
811+
const vx_expression *vx_expression_or(const vx_expression *const *expressions, size_t len);
812+
813+
/**
814+
* Create a binary expression for two expressions of form lhs OP rhs.
815+
* If either input is NULL, returns NULL.
816+
*
817+
* Example:
818+
*
819+
* const vx_expression* age = vx_expression_column("age");
820+
* const vx_expression* height = vx_expression_column("height");
821+
* const vx_expression* sum = vx_expression_binary(VX_OPERATOR_SUM, age, height);
822+
* vx_expression_free(sum);
823+
* vx_expression_free(height);
824+
* vx_expression_free(age);
825+
*
826+
*/
827+
const vx_expression *
828+
vx_expression_binary(vx_operator operator_, const vx_expression *lhs, const vx_expression *rhs);
829+
830+
const vx_expression *vx_expression_eq(const vx_expression *lhs, const vx_expression *rhs);
831+
832+
const vx_expression *vx_expression_not_eq(const vx_expression *lhs, const vx_expression *rhs);
833+
834+
const vx_expression *vx_expression_lt(const vx_expression *lhs, const vx_expression *rhs);
835+
836+
const vx_expression *vx_expression_lt_eq(const vx_expression *lhs, const vx_expression *rhs);
837+
838+
const vx_expression *vx_expression_gt(const vx_expression *lhs, const vx_expression *rhs);
839+
840+
const vx_expression *vx_expression_gt_eq(const vx_expression *lhs, const vx_expression *rhs);
841+
842+
/**
843+
* Create a logical NOT of the child expression.
844+
*
845+
* Returns the logical negation of the input boolean expression.
846+
*/
847+
const vx_expression *vx_expression_not(const vx_expression *child);
848+
849+
/**
850+
* Create an expression that checks for null values.
851+
*
852+
* Returns a boolean array indicating which positions contain null values.
853+
*/
854+
const vx_expression *vx_expression_is_null(const vx_expression *child);
855+
856+
/**
857+
* Create an expression that extracts a named field from a struct expression.
858+
* Child expression must have a DTYPE_STRUCT dtype.
859+
*
860+
* Accesses the specified field from the result of the child expression.
861+
* Equivalent to select(&item, 1, child).
862+
*/
863+
const vx_expression *vx_expression_get_item(const char *item, const vx_expression *child);
864+
865+
/**
866+
* Create an expression that checks if a value is contained in a list.
867+
*
868+
* Returns a boolean array indicating whether the value appears in each list.
869+
*/
870+
const vx_expression *vx_expression_list_contains(const vx_expression *list, const vx_expression *value);
871+
680872
/**
681873
* Clone a borrowed [`vx_file`], returning an owned [`vx_file`].
682874
*

vortex-ffi/src/array.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ use vortex::array::DynArray;
99
use vortex::array::ToCanonical;
1010
use vortex::dtype::half::f16;
1111
use vortex::error::VortexExpect;
12+
use vortex::error::vortex_ensure;
1213
use vortex::error::vortex_err;
1314

1415
use crate::arc_dyn_wrapper;
1516
use crate::binary::vx_binary;
1617
use crate::dtype::vx_dtype;
1718
use crate::error::try_or_default;
1819
use crate::error::vx_error;
20+
use crate::expression::vx_expression;
1921
use crate::string::vx_string;
2022

2123
arc_dyn_wrapper!(
@@ -186,11 +188,28 @@ pub unsafe extern "C-unwind" fn vx_array_get_binary(
186188
}
187189
}
188190

191+
/// Apply the expression to the array, producing a new array in constant time
192+
#[unsafe(no_mangle)]
193+
pub unsafe extern "C" fn vx_array_apply(
194+
array: *const vx_array,
195+
expression: *const vx_expression,
196+
error: *mut *mut vx_error,
197+
) -> *const vx_array {
198+
try_or_default(error, || {
199+
vortex_ensure!(!array.is_null());
200+
vortex_ensure!(!expression.is_null());
201+
let array = vx_array::as_ref(array);
202+
let expression = vx_expression::as_ref(expression);
203+
Ok(vx_array::new(Arc::new(array.apply(expression)?)))
204+
})
205+
}
206+
189207
#[cfg(test)]
190208
mod tests {
191209
use std::ptr;
192210

193211
use vortex::array::IntoArray;
212+
use vortex::array::arrays::BoolArray;
194213
use vortex::array::arrays::PrimitiveArray;
195214
use vortex::array::arrays::StructArray;
196215
use vortex::array::arrays::VarBinViewArray;
@@ -199,12 +218,16 @@ mod tests {
199218
use vortex::buffer::buffer;
200219
#[cfg(not(miri))]
201220
use vortex::dtype::half::f16;
221+
use vortex::expr::eq;
222+
use vortex::expr::lit;
223+
use vortex::expr::root;
202224

203225
use crate::array::*;
204226
use crate::binary::vx_binary_free;
205227
use crate::dtype::vx_dtype_get_variant;
206228
use crate::dtype::vx_dtype_variant;
207229
use crate::error::vx_error_free;
230+
use crate::expression::vx_expression_free;
208231
use crate::string::vx_string_free;
209232

210233
#[test]
@@ -424,6 +447,55 @@ mod tests {
424447
}
425448
}
426449

450+
#[test]
451+
#[cfg_attr(miri, ignore)]
452+
fn test_apply() {
453+
let primitive = PrimitiveArray::new(
454+
buffer![1i32, 2i32, 3i32, 3i32],
455+
Validity::from_iter([true, false, true, true]),
456+
);
457+
458+
unsafe {
459+
let mut error = ptr::null_mut();
460+
461+
let res = vx_array_apply(ptr::null(), ptr::null(), &raw mut error);
462+
assert!(res.is_null());
463+
assert!(!error.is_null());
464+
vx_error_free(error);
465+
466+
let array = vx_array::new(primitive.into_array());
467+
468+
let res = vx_array_apply(array, ptr::null(), &raw mut error);
469+
assert!(res.is_null());
470+
assert!(!error.is_null());
471+
vx_error_free(error);
472+
473+
// Test with Vortex Rust-side expressions here, test C API for
474+
// expressions in src/expressions.rs
475+
let expression = eq(root(), lit(3i32));
476+
let expression = vx_expression::new(Arc::new(expression));
477+
478+
let res = vx_array_apply(ptr::null(), expression, &raw mut error);
479+
assert!(res.is_null());
480+
assert!(!error.is_null());
481+
vx_error_free(error);
482+
483+
let res = vx_array_apply(array, expression, &raw mut error);
484+
assert!(!res.is_null());
485+
assert!(error.is_null());
486+
{
487+
let res = vx_array::as_ref(res);
488+
let buffer = res.to_bool().to_bit_buffer();
489+
let expected = BoolArray::from_iter(vec![false, false, true, true]);
490+
assert_eq!(buffer, expected.to_bit_buffer());
491+
}
492+
vx_array_free(res);
493+
494+
vx_expression_free(expression);
495+
vx_array_free(array);
496+
}
497+
}
498+
427499
#[test]
428500
fn test_array_dtype_lifetime_pattern() {
429501
let array = {

0 commit comments

Comments
 (0)