Skip to content

Commit 2ebedaf

Browse files
committed
initial
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent 452a4a3 commit 2ebedaf

17 files changed

Lines changed: 800 additions & 364 deletions

File tree

.github/workflows/rust-instrumented.yml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,23 @@ jobs:
187187
cargo +$NIGHTLY_TOOLCHAIN build --locked --no-default-features \
188188
--target x86_64-unknown-linux-gnu -Zbuild-std \
189189
-p vortex-ffi
190-
- name: Build FFI library tests
190+
- name: Build FFI library tests and examples
191191
run: |
192192
cd vortex-ffi
193-
cmake -Bbuild -DBUILD_TESTS=1 -DSANITIZER=${{ matrix.sanitizer }} -DTARGET_TRIPLE="x86_64-unknown-linux-gnu"
193+
cmake -Bbuild -DBUILD_TESTS=1 -DBUILD_EXAMPLES=1 -DSANITIZER=${{ matrix.sanitizer }} -DTARGET_TRIPLE="x86_64-unknown-linux-gnu"
194194
cmake --build build -j
195195
- name: Run tests
196196
run: |
197197
set -o pipefail
198-
./vortex-ffi/build/test/vortex_ffi_test 2>&1 | rustfilt -i-
198+
./vortex-ffi/build/test/vortex_ffi_test 2>&1 | rustfilt
199+
- name: Run examples
200+
run: |
201+
set -o pipefail
202+
./vortex-ffi/build/examples/write_sample file.vortex 2>&1 | rustfilt
203+
./vortex-ffi/build/examples/write_sample file2.vortex 2>&1 | rustfilt
204+
./vortex-ffi/build/examples/dtype '*.vortex' 2>&1 | rustfilt
205+
./vortex-ffi/build/examples/scan '*.vortex' 2>&1 | rustfilt
206+
./vortex-ffi/build/examples/scan_to_arrow '*.vortex' 2>&1 | rustfilt
199207
200208
miri:
201209
name: "Rust tests (miri)"

vortex-ffi/CMakeLists.txt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
33
cmake_minimum_required(VERSION 3.10)
44

5+
include(FetchContent)
6+
57
project(VortexFFI
68
VERSION 0.0.1
79
LANGUAGES C)
@@ -10,6 +12,7 @@ set(CMAKE_C_STANDARD 17)
1012
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -Wextra -Wpedantic")
1113

1214
option(BUILD_TESTS "Build tests" OFF)
15+
option(BUILD_EXAMPLES "Build examples" OFF)
1316

1417
set(SANITIZER "" CACHE STRING "Build with sanitizers")
1518
set(TARGET_TRIPLE "" CACHE STRING "Rust target triple for FFI library")
@@ -76,10 +79,10 @@ Static library path ${LIBRARY_PATH}
7679
Headers path ${LIBRARY_HEADERS}")
7780

7881
if (NOT EXISTS "${LIBRARY_PATH_SHARED}")
79-
message(FATAL_ERROR "Shared library not found")
82+
message(FATAL_ERROR "Shared library not found, run `cargo build --release -p vortex-ffi`")
8083
endif()
8184
if (NOT EXISTS "${LIBRARY_PATH}")
82-
message(FATAL_ERROR "Static library not found")
85+
message(FATAL_ERROR "Static library not found, run `cargo build --release -p vortex-ffi`")
8386
endif()
8487

8588
add_library(vortex_ffi STATIC IMPORTED)
@@ -95,6 +98,15 @@ set_target_properties(vortex_ffi_shared PROPERTIES
9598
INTERFACE_LINK_OPTIONS "LINKER:-rpath,${LIBRARY_DIR}"
9699
)
97100

101+
if (BUILD_TESTS OR BUILD_EXAMPLES)
102+
FetchContent_Declare(
103+
Nanoarrow
104+
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow
105+
GIT_TAG apache-arrow-nanoarrow-0.8.0
106+
)
107+
FetchContent_MakeAvailable(Nanoarrow)
108+
endif()
109+
98110
if (BUILD_TESTS)
99111
enable_language(CXX)
100112
set(CMAKE_CXX_STANDARD 20)
@@ -103,3 +115,7 @@ if (BUILD_TESTS)
103115
enable_testing()
104116
add_subdirectory(test)
105117
endif()
118+
119+
if (BUILD_EXAMPLES)
120+
add_subdirectory(examples)
121+
endif()

vortex-ffi/README.md

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,50 @@
1-
# Foreign Function Interface
1+
# Vortex C interface
2+
## Usage from a CMake project
23

3-
Vortex is a file format that can be used by any execution engine. Nearly every programming language supports
4-
the C ABI (Application Binary Interface), so by providing an FFI interface to work with Vortex objects we can
5-
make it easy to support a variety of languages.
6-
7-
Check out the [`examples`](./examples/) directory to see an example of how to use the API to build
8-
a real native application.
9-
10-
## Design
11-
12-
The FFI is designed to be very simple and follows a very object-oriented approach:
13-
14-
- **Constructors** are simple C functions that return opaque pointers
15-
- **Methods** are functions that receive an opaque pointer as the first argument, followed by subsequent arguments.
16-
Methods may return a value or void.
17-
- **Destructors** free native resources (allocations, file handles, network sockets) and must be explicitly called by
18-
the foreign language to avoid leaking resources.
19-
20-
Constructors will generally allocate rust memory, and destructors free that memory.
21-
22-
## Documentation
23-
24-
The FFI API is documented in `docs/api/c` with explicit inclusion of types, enums, and functions, etc. Note that an
25-
item cannot be referenced in the documentation if it does not have a documentation comment.
4+
```
5+
# in vortex folder
6+
cargo build --release -p vortex-ffi
267
27-
## Updating Headers
8+
# in your CMakeLists.txt
9+
include_directory(vortex/vortex-ffi)
10+
target_link_libraries(my_target, vortex_ffi_shared)
11+
# or target_link_libraries(my_target, vortex_ffi)
12+
```
2813

29-
To rebuild the header file:
14+
## Running C examples:
3015

3116
```sh
32-
cargo +nightly build -p vortex-ffi
17+
cmake -Bbuild -DBUILD_EXAMPLES=1
18+
cmake --build build
19+
./build/examples/dtype
20+
./build/examples/scan
21+
./build/examples/scan_to_arrow
22+
./build/examples/write_sample
3323
```
3424

35-
The header generation uses cbindgen's macro expansion feature which requires nightly.
36-
Stable builds use the checked-in header file at `cinclude/vortex.h`.
25+
## Updating Headers
26+
27+
If you're developing FFI and want to rebuild `cinclude/vortex.h`, run
28+
`cargo +nightly build -p vortex-ffi`.
3729

38-
### Testing C part
30+
## Testing C part
3931

40-
Build the test library
32+
Build the test library:
4133

4234
```sh
43-
cmake -Bbuild
44-
cmake --build build -j $(nproc)
35+
cmake -Bbuild -DBUILD_TESTS=1
36+
cmake --build build
4537
```
4638

47-
Run the tests
39+
Run the tests:
4840

4941
```sh
5042
ctest --test-dir build -j $(nproc)
5143
```
5244

53-
You would need C++ compiler toolchain to run the tests since they use Catch2.
45+
You will need C++ compiler toolchain to run the tests since they use Catch2.
5446

55-
### Testing Rust part with sanitizers
47+
## Testing Rust part with sanitizers
5648

5749
AddressSanitizer:
5850

@@ -90,20 +82,20 @@ with sanitizers.
9082
- `allow-abi-mismatch` is safe because in our dependency graph only crates like
9183
`compiler_builtins` unset sanitization, and they do it on purpose.
9284
- Make sure to use `cargo test` and not `cargo nextest` as nextest reports less
93-
leaks.
85+
leaks.
9486
- If you want stack trace symbolization, install `llvm-symbolizer`.
9587

96-
### Testing Rust and C with sanitizers
88+
## Testing Rust and C with sanitizers
9789

9890
1. Build FFI library with external sanitizer runtime:
9991

10092
```sh
10193
RUSTFLAGS="-Zsanitizer=address -Zexternal-clangrt" \
10294
cargo +nightly build -Zbuild-std --target=<target triple> \
103-
--no-default-features -p vortex-ffi
95+
--no-default-features -p vortex-ffi
10496
```
10597

106-
2. Build tests with target triple
98+
2. Build tests with target triple:
10799

108100
```sh
109101
cmake -Bbuild -DWITH_ASAN=1 -DTARGET_TRIPLE=<target triple>

vortex-ffi/cbindgen.toml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,15 @@ header = """
1616
//
1717
1818
// https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
19-
// We don't want to bundle nanoarrow or similar just for these two definitions.
20-
// If you use your own Arrow library, define this macro and
21-
// typedef FFI_ArrowSchema ArrowSchema;
22-
// typedef FFI_ArrowArrayStream ArrowArrayStream;
19+
// If you want to use your own Arrow library like nanoarrow, define this macro
20+
// and typedef your types:
21+
//
22+
// #include "nanoarrow/common/inline_types.h"
23+
// #define USE_OWN_ARROW
24+
// typedef struct ArrowSchema FFI_ArrowSchema;
25+
// typedef struct ArrowArrayStream FFI_ArrowArrayStream;
26+
// #include "vortex.h"
27+
//
2328
#ifndef USE_OWN_ARROW
2429
struct ArrowSchema {
2530
const char* format;

vortex-ffi/cinclude/vortex.h

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,15 @@
88
//
99

1010
// https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
11-
// We don't want to bundle nanoarrow or similar just for these two definitions.
12-
// If you use your own Arrow library, define this macro and
13-
// typedef FFI_ArrowSchema ArrowSchema;
14-
// typedef FFI_ArrowArrayStream ArrowArrayStream;
11+
// If you want to use your own Arrow library like nanoarrow, define this macro
12+
// and typedef your types:
13+
//
14+
// #include "nanoarrow/common/inline_types.h"
15+
// #define USE_OWN_ARROW
16+
// typedef struct ArrowSchema FFI_ArrowSchema;
17+
// typedef struct ArrowArrayStream FFI_ArrowArrayStream;
18+
// #include "vortex.h"
19+
//
1520
#ifndef USE_OWN_ARROW
1621
struct ArrowSchema {
1722
const char *format;
@@ -175,10 +180,19 @@ typedef enum {
175180
} vx_validity_type;
176181

177182
typedef enum {
178-
VX_CARD_UNKNOWN = 0,
179-
VX_CARD_ESTIMATE = 1,
180-
VX_CARD_MAXIMUM = 2,
181-
} vx_cardinality;
183+
/**
184+
* No estimate is available.
185+
*/
186+
VX_ESTIMATE_UNKNOWN = 0,
187+
/**
188+
* The value in vx_estimate.estimate is exact.
189+
*/
190+
VX_ESTIMATE_EXACT = 1,
191+
/**
192+
* The value in vx_estimate.estimate is an upper bound.
193+
*/
194+
VX_ESTIMATE_INEXACT = 2,
195+
} vx_estimate_type;
182196

183197
/**
184198
* Equalities, inequalities, and boolean operations over possibly null values.
@@ -282,21 +296,6 @@ typedef enum {
282296
VX_SELECTION_EXCLUDE_RANGE = 2,
283297
} vx_scan_selection_include;
284298

285-
typedef enum {
286-
/**
287-
* No estimate is available.
288-
*/
289-
VX_ESTIMATE_UNKNOWN = 0,
290-
/**
291-
* The value in vx_estimate.estimate is exact.
292-
*/
293-
VX_ESTIMATE_EXACT = 1,
294-
/**
295-
* The value in vx_estimate.estimate is an upper bound.
296-
*/
297-
VX_ESTIMATE_INEXACT = 2,
298-
} vx_estimate_type;
299-
300299
/**
301300
* Physical type enum, represents the in-memory physical layout but might represent a different logical type.
302301
*/
@@ -490,6 +489,10 @@ typedef struct vx_file vx_file;
490489
*/
491490
typedef struct vx_partition vx_partition;
492491

492+
/**
493+
* A scan is a single traversal of a data source with projections and
494+
* filters. A scan can be consumed only once.
495+
*/
493496
typedef struct vx_scan vx_scan;
494497

495498
/**
@@ -537,13 +540,17 @@ typedef struct {
537540
const char *paths;
538541
} vx_data_source_options;
539542

543+
/**
544+
* Used for estimating number of partitions in a data source or number of rows
545+
* in a partition.
546+
*/
540547
typedef struct {
541-
vx_cardinality cardinality;
548+
vx_estimate_type type;
542549
/**
543-
* Set only when "cardinality" is not VX_CARD_UNKNOWN
550+
* Set only when "type" is not VX_ESTIMATE_UNKNOWN.
544551
*/
545-
uint64_t rows;
546-
} vx_data_source_row_count;
552+
uint64_t estimate;
553+
} vx_estimate;
547554

548555
/**
549556
* Options supplied for opening a file.
@@ -662,18 +669,6 @@ typedef struct {
662669
bool ordered;
663670
} vx_scan_options;
664671

665-
/**
666-
* Used for estimating number of partitions in a data source or number of rows
667-
* in a partition.
668-
*/
669-
typedef struct {
670-
vx_estimate_type type;
671-
/**
672-
* Set only when "type" is not VX_ESTIMATE_UNKNOWN.
673-
*/
674-
uint64_t estimate;
675-
} vx_estimate;
676-
677672
#ifdef __cplusplus
678673
extern "C" {
679674
#endif // __cplusplus
@@ -921,7 +916,7 @@ const vx_dtype *vx_data_source_dtype(const vx_data_source *ds);
921916
/**
922917
* Write data source's row count estimate into "row_count".
923918
*/
924-
void vx_data_source_get_row_count(const vx_data_source *ds, vx_data_source_row_count *row_count);
919+
void vx_data_source_get_row_count(const vx_data_source *ds, vx_estimate *row_count);
925920

926921
/**
927922
* Clone a borrowed [`vx_dtype`], returning an owned [`vx_dtype`].
@@ -1319,6 +1314,17 @@ vx_partition *vx_scan_next_partition(vx_scan *scan, vx_error **err);
13191314
*/
13201315
int vx_partition_row_count(const vx_partition *partition, vx_estimate *count, vx_error **err);
13211316

1317+
/**
1318+
* Scan partition to ArrowArrayStream.
1319+
* Consumes partition fully: subsequent calls to vx_partition_scan_arrow or
1320+
* vx_partition_next are undefined behaviour.
1321+
* This call blocks current thread until underlying stream is fully consumed.
1322+
*
1323+
* Caller must not free partition after calling this function.
1324+
*
1325+
* On success, sets "stream" and returns 0.
1326+
* On error, sets "err" and returns 1, freeing the partition.
1327+
*/
13221328
int vx_partition_scan_arrow(const vx_session *session,
13231329
vx_partition *partition,
13241330
FFI_ArrowArrayStream *stream,

vortex-ffi/examples/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# SPDX-License-Identifier: CC-BY-4.0
2+
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
# allow linking with vortex_ffi_shared although it's not in current folder
5+
cmake_policy(SET CMP0079 NEW)
6+
7+
add_executable(scan scan.c)
8+
target_link_libraries(scan PRIVATE vortex_ffi_shared)
9+
10+
add_executable(scan_to_arrow scan_to_arrow.c)
11+
target_link_libraries(scan_to_arrow PRIVATE
12+
nanoarrow_shared vortex_ffi_shared)
13+
14+
add_executable(dtype dtype.c)
15+
target_link_libraries(dtype PRIVATE vortex_ffi_shared)
16+
17+
add_executable(write_sample write_sample.c)
18+
target_link_libraries(write_sample PRIVATE vortex_ffi_shared)

0 commit comments

Comments
 (0)