Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions .github/workflows/rust-instrumented.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,28 @@ jobs:
cargo +$NIGHTLY_TOOLCHAIN build --locked --no-default-features \
--target x86_64-unknown-linux-gnu -Zbuild-std \
-p vortex-ffi
- name: Build FFI library tests
- name: Build FFI library tests and examples
run: |
cd vortex-ffi
cmake -Bbuild -DBUILD_TESTS=1 -DSANITIZER=${{ matrix.sanitizer }} -DTARGET_TRIPLE="x86_64-unknown-linux-gnu"
cmake -Bbuild -DBUILD_TESTS=1 -DBUILD_EXAMPLES=1 -DSANITIZER=${{ matrix.sanitizer }} -DTARGET_TRIPLE="x86_64-unknown-linux-gnu"
cmake --build build -j
- name: Run tests
run: |
set -o pipefail
./vortex-ffi/build/test/vortex_ffi_test 2>&1 | rustfilt -i-
./vortex-ffi/build/test/vortex_ffi_test 2>&1 | rustfilt
- name: Run examples
run: |
set -o pipefail

# Failed to create data source: Object store error: Generic LocalFileSystem
# error: Unable to walk dir: File system loop found
rm -fr vortex-ffi/build/_deps/nanoarrow-src/python

./vortex-ffi/build/examples/write_sample file.vortex 2>&1 | rustfilt
./vortex-ffi/build/examples/write_sample file2.vortex 2>&1 | rustfilt
Comment thread
myrrc marked this conversation as resolved.
./vortex-ffi/build/examples/dtype '*.vortex' 2>&1 | rustfilt
./vortex-ffi/build/examples/scan '*.vortex' 2>&1 | rustfilt
./vortex-ffi/build/examples/scan_to_arrow '*.vortex' 2>&1 | rustfilt

miri:
name: "Rust tests (miri)"
Expand Down
20 changes: 18 additions & 2 deletions vortex-ffi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright the Vortex contributors
cmake_minimum_required(VERSION 3.10)

include(FetchContent)

project(VortexFFI
VERSION 0.0.1
LANGUAGES C)
Expand All @@ -10,6 +12,7 @@ set(CMAKE_C_STANDARD 17)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -Wextra -Wpedantic")

option(BUILD_TESTS "Build tests" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)

set(SANITIZER "" CACHE STRING "Build with sanitizers")
set(TARGET_TRIPLE "" CACHE STRING "Rust target triple for FFI library")
Expand Down Expand Up @@ -76,10 +79,10 @@ Static library path ${LIBRARY_PATH}
Headers path ${LIBRARY_HEADERS}")

if (NOT EXISTS "${LIBRARY_PATH_SHARED}")
message(FATAL_ERROR "Shared library not found")
message(FATAL_ERROR "Shared library not found, run `cargo build --release -p vortex-ffi`")
endif()
if (NOT EXISTS "${LIBRARY_PATH}")
message(FATAL_ERROR "Static library not found")
message(FATAL_ERROR "Static library not found, run `cargo build --release -p vortex-ffi`")
endif()

add_library(vortex_ffi STATIC IMPORTED)
Expand All @@ -95,6 +98,15 @@ set_target_properties(vortex_ffi_shared PROPERTIES
INTERFACE_LINK_OPTIONS "LINKER:-rpath,${LIBRARY_DIR}"
)

if (BUILD_TESTS OR BUILD_EXAMPLES)
FetchContent_Declare(
Nanoarrow
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow
GIT_TAG apache-arrow-nanoarrow-0.8.0
)
FetchContent_MakeAvailable(Nanoarrow)
endif()

if (BUILD_TESTS)
enable_language(CXX)
set(CMAKE_CXX_STANDARD 20)
Comment thread
myrrc marked this conversation as resolved.
Expand All @@ -103,3 +115,7 @@ if (BUILD_TESTS)
enable_testing()
add_subdirectory(test)
endif()

if (BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
72 changes: 32 additions & 40 deletions vortex-ffi/README.md
Original file line number Diff line number Diff line change
@@ -1,58 +1,50 @@
# Foreign Function Interface
# Vortex C interface
## Usage from a CMake project

Vortex is a file format that can be used by any execution engine. Nearly every programming language supports
the C ABI (Application Binary Interface), so by providing an FFI interface to work with Vortex objects we can
make it easy to support a variety of languages.

Check out the [`examples`](./examples/) directory to see an example of how to use the API to build
a real native application.

## Design

The FFI is designed to be very simple and follows a very object-oriented approach:

- **Constructors** are simple C functions that return opaque pointers
- **Methods** are functions that receive an opaque pointer as the first argument, followed by subsequent arguments.
Methods may return a value or void.
- **Destructors** free native resources (allocations, file handles, network sockets) and must be explicitly called by
the foreign language to avoid leaking resources.

Constructors will generally allocate rust memory, and destructors free that memory.

## Documentation

The FFI API is documented in `docs/api/c` with explicit inclusion of types, enums, and functions, etc. Note that an
item cannot be referenced in the documentation if it does not have a documentation comment.
```
# in vortex folder
cargo build --release -p vortex-ffi

## Updating Headers
# in your CMakeLists.txt
include_directory(vortex/vortex-ffi)
target_link_libraries(my_target, vortex_ffi_shared)
# or target_link_libraries(my_target, vortex_ffi)
```

To rebuild the header file:
## Running C examples:

```sh
cargo +nightly build -p vortex-ffi
cmake -Bbuild -DBUILD_EXAMPLES=1
cmake --build build
./build/examples/dtype
Comment thread
myrrc marked this conversation as resolved.
./build/examples/scan
./build/examples/scan_to_arrow
./build/examples/write_sample
```

The header generation uses cbindgen's macro expansion feature which requires nightly.
Stable builds use the checked-in header file at `cinclude/vortex.h`.
## Updating Headers

If you're developing FFI and want to rebuild `cinclude/vortex.h`, run
`cargo +nightly build -p vortex-ffi`.

### Testing C part
## Testing C part

Build the test library
Build the test library:

```sh
cmake -Bbuild
cmake --build build -j $(nproc)
cmake -Bbuild -DBUILD_TESTS=1
cmake --build build
```

Run the tests
Run the tests:

```sh
ctest --test-dir build -j $(nproc)
```

You would need C++ compiler toolchain to run the tests since they use Catch2.
You will need C++ compiler toolchain to run the tests since they use Catch2.

### Testing Rust part with sanitizers
## Testing Rust part with sanitizers

AddressSanitizer:

Expand Down Expand Up @@ -90,20 +82,20 @@ with sanitizers.
- `allow-abi-mismatch` is safe because in our dependency graph only crates like
`compiler_builtins` unset sanitization, and they do it on purpose.
- Make sure to use `cargo test` and not `cargo nextest` as nextest reports less
leaks.
leaks.
- If you want stack trace symbolization, install `llvm-symbolizer`.

### Testing Rust and C with sanitizers
## Testing Rust and C with sanitizers

1. Build FFI library with external sanitizer runtime:

```sh
RUSTFLAGS="-Zsanitizer=address -Zexternal-clangrt" \
cargo +nightly build -Zbuild-std --target=<target triple> \
--no-default-features -p vortex-ffi
--no-default-features -p vortex-ffi
```

2. Build tests with target triple
2. Build tests with target triple:

```sh
cmake -Bbuild -DWITH_ASAN=1 -DTARGET_TRIPLE=<target triple>
Expand Down
13 changes: 9 additions & 4 deletions vortex-ffi/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ header = """
//

// https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
// We don't want to bundle nanoarrow or similar just for these two definitions.
// If you use your own Arrow library, define this macro and
// typedef FFI_ArrowSchema ArrowSchema;
// typedef FFI_ArrowArrayStream ArrowArrayStream;
// If you want to use your own Arrow library like nanoarrow, define this macro
// and typedef your types:
//
// #include "nanoarrow/common/inline_types.h"
// #define USE_OWN_ARROW
// typedef struct ArrowSchema FFI_ArrowSchema;
// typedef struct ArrowArrayStream FFI_ArrowArrayStream;
// #include "vortex.h"
//
#ifndef USE_OWN_ARROW
struct ArrowSchema {
const char* format;
Expand Down
86 changes: 46 additions & 40 deletions vortex-ffi/cinclude/vortex.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@
//

// https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
// We don't want to bundle nanoarrow or similar just for these two definitions.
// If you use your own Arrow library, define this macro and
// typedef FFI_ArrowSchema ArrowSchema;
// typedef FFI_ArrowArrayStream ArrowArrayStream;
// If you want to use your own Arrow library like nanoarrow, define this macro
// and typedef your types:
//
// #include "nanoarrow/common/inline_types.h"
// #define USE_OWN_ARROW
// typedef struct ArrowSchema FFI_ArrowSchema;
// typedef struct ArrowArrayStream FFI_ArrowArrayStream;
// #include "vortex.h"
//
#ifndef USE_OWN_ARROW
struct ArrowSchema {
const char *format;
Expand Down Expand Up @@ -175,10 +180,19 @@ typedef enum {
} vx_validity_type;

typedef enum {
VX_CARD_UNKNOWN = 0,
VX_CARD_ESTIMATE = 1,
VX_CARD_MAXIMUM = 2,
} vx_cardinality;
/**
* No estimate is available.
*/
VX_ESTIMATE_UNKNOWN = 0,
/**
* The value in vx_estimate.estimate is exact.
*/
VX_ESTIMATE_EXACT = 1,
/**
* The value in vx_estimate.estimate is an upper bound.
*/
VX_ESTIMATE_INEXACT = 2,
} vx_estimate_type;

/**
* Equalities, inequalities, and boolean operations over possibly null values.
Expand Down Expand Up @@ -282,21 +296,6 @@ typedef enum {
VX_SELECTION_EXCLUDE_RANGE = 2,
} vx_scan_selection_include;

typedef enum {
/**
* No estimate is available.
*/
VX_ESTIMATE_UNKNOWN = 0,
/**
* The value in vx_estimate.estimate is exact.
*/
VX_ESTIMATE_EXACT = 1,
/**
* The value in vx_estimate.estimate is an upper bound.
*/
VX_ESTIMATE_INEXACT = 2,
} vx_estimate_type;

/**
* Physical type enum, represents the in-memory physical layout but might represent a different logical type.
*/
Expand Down Expand Up @@ -490,6 +489,10 @@ typedef struct vx_file vx_file;
*/
typedef struct vx_partition vx_partition;

/**
* A scan is a single traversal of a data source with projections and
* filters. A scan can be consumed only once.
*/
typedef struct vx_scan vx_scan;

/**
Expand Down Expand Up @@ -537,13 +540,17 @@ typedef struct {
const char *paths;
} vx_data_source_options;

/**
* Used for estimating number of partitions in a data source or number of rows
* in a partition.
*/
typedef struct {
vx_cardinality cardinality;
vx_estimate_type type;
/**
* Set only when "cardinality" is not VX_CARD_UNKNOWN
* Set only when "type" is not VX_ESTIMATE_UNKNOWN.
*/
uint64_t rows;
} vx_data_source_row_count;
uint64_t estimate;
} vx_estimate;

/**
* Options supplied for opening a file.
Expand Down Expand Up @@ -662,18 +669,6 @@ typedef struct {
bool ordered;
} vx_scan_options;

/**
* Used for estimating number of partitions in a data source or number of rows
* in a partition.
*/
typedef struct {
vx_estimate_type type;
/**
* Set only when "type" is not VX_ESTIMATE_UNKNOWN.
*/
uint64_t estimate;
} vx_estimate;

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
Expand Down Expand Up @@ -921,7 +916,7 @@ const vx_dtype *vx_data_source_dtype(const vx_data_source *ds);
/**
* Write data source's row count estimate into "row_count".
*/
void vx_data_source_get_row_count(const vx_data_source *ds, vx_data_source_row_count *row_count);
void vx_data_source_get_row_count(const vx_data_source *ds, vx_estimate *row_count);

/**
* Clone a borrowed [`vx_dtype`], returning an owned [`vx_dtype`].
Expand Down Expand Up @@ -1319,6 +1314,17 @@ vx_partition *vx_scan_next_partition(vx_scan *scan, vx_error **err);
*/
int vx_partition_row_count(const vx_partition *partition, vx_estimate *count, vx_error **err);

/**
* Scan partition to ArrowArrayStream.
* Consumes partition fully: subsequent calls to vx_partition_scan_arrow or
* vx_partition_next are undefined behaviour.
* This call blocks current thread until underlying stream is fully consumed.
*
* Caller must not free partition after calling this function.
*
* On success, sets "stream" and returns 0.
* On error, sets "err" and returns 1, freeing the partition.
*/
int vx_partition_scan_arrow(const vx_session *session,
vx_partition *partition,
FFI_ArrowArrayStream *stream,
Expand Down
18 changes: 18 additions & 0 deletions vortex-ffi/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-License-Identifier: CC-BY-4.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors

# allow linking with vortex_ffi_shared although it's not in current folder
cmake_policy(SET CMP0079 NEW)

add_executable(scan scan.c)
target_link_libraries(scan PRIVATE vortex_ffi_shared)

add_executable(scan_to_arrow scan_to_arrow.c)
target_link_libraries(scan_to_arrow PRIVATE
nanoarrow_shared vortex_ffi_shared)

add_executable(dtype dtype.c)
target_link_libraries(dtype PRIVATE vortex_ffi_shared)

add_executable(write_sample write_sample.c)
target_link_libraries(write_sample PRIVATE vortex_ffi_shared)
Loading
Loading