|
8 | 8 | // |
9 | 9 |
|
10 | 10 | // https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions |
11 | | -// We don't want to bundle nanoarrow or similar just for these two definitions. |
12 | | -// If you use your own Arrow library, define this macro and |
13 | | -// typedef FFI_ArrowSchema ArrowSchema; |
14 | | -// typedef FFI_ArrowArrayStream ArrowArrayStream; |
| 11 | +// If you want to use your own Arrow library like nanoarrow, define this macro |
| 12 | +// and typedef your types: |
| 13 | +// |
| 14 | +// #include "nanoarrow/common/inline_types.h" |
| 15 | +// #define USE_OWN_ARROW |
| 16 | +// typedef struct ArrowSchema FFI_ArrowSchema; |
| 17 | +// typedef struct ArrowArrayStream FFI_ArrowArrayStream; |
| 18 | +// #include "vortex.h" |
| 19 | +// |
15 | 20 | #ifndef USE_OWN_ARROW |
16 | 21 | struct ArrowSchema { |
17 | 22 | const char *format; |
@@ -175,10 +180,19 @@ typedef enum { |
175 | 180 | } vx_validity_type; |
176 | 181 |
|
177 | 182 | typedef enum { |
178 | | - VX_CARD_UNKNOWN = 0, |
179 | | - VX_CARD_ESTIMATE = 1, |
180 | | - VX_CARD_MAXIMUM = 2, |
181 | | -} vx_cardinality; |
| 183 | + /** |
| 184 | + * No estimate is available. |
| 185 | + */ |
| 186 | + VX_ESTIMATE_UNKNOWN = 0, |
| 187 | + /** |
| 188 | + * The value in vx_estimate.estimate is exact. |
| 189 | + */ |
| 190 | + VX_ESTIMATE_EXACT = 1, |
| 191 | + /** |
| 192 | + * The value in vx_estimate.estimate is an upper bound. |
| 193 | + */ |
| 194 | + VX_ESTIMATE_INEXACT = 2, |
| 195 | +} vx_estimate_type; |
182 | 196 |
|
183 | 197 | /** |
184 | 198 | * Equalities, inequalities, and boolean operations over possibly null values. |
@@ -282,21 +296,6 @@ typedef enum { |
282 | 296 | VX_SELECTION_EXCLUDE_RANGE = 2, |
283 | 297 | } vx_scan_selection_include; |
284 | 298 |
|
285 | | -typedef enum { |
286 | | - /** |
287 | | - * No estimate is available. |
288 | | - */ |
289 | | - VX_ESTIMATE_UNKNOWN = 0, |
290 | | - /** |
291 | | - * The value in vx_estimate.estimate is exact. |
292 | | - */ |
293 | | - VX_ESTIMATE_EXACT = 1, |
294 | | - /** |
295 | | - * The value in vx_estimate.estimate is an upper bound. |
296 | | - */ |
297 | | - VX_ESTIMATE_INEXACT = 2, |
298 | | -} vx_estimate_type; |
299 | | - |
300 | 299 | /** |
301 | 300 | * Physical type enum, represents the in-memory physical layout but might represent a different logical type. |
302 | 301 | */ |
@@ -490,6 +489,10 @@ typedef struct vx_file vx_file; |
490 | 489 | */ |
491 | 490 | typedef struct vx_partition vx_partition; |
492 | 491 |
|
| 492 | +/** |
| 493 | + * A scan is a single traversal of a data source with projections and |
| 494 | + * filters. A scan can be consumed only once. |
| 495 | + */ |
493 | 496 | typedef struct vx_scan vx_scan; |
494 | 497 |
|
495 | 498 | /** |
@@ -537,13 +540,17 @@ typedef struct { |
537 | 540 | const char *paths; |
538 | 541 | } vx_data_source_options; |
539 | 542 |
|
| 543 | +/** |
| 544 | + * Used for estimating number of partitions in a data source or number of rows |
| 545 | + * in a partition. |
| 546 | + */ |
540 | 547 | typedef struct { |
541 | | - vx_cardinality cardinality; |
| 548 | + vx_estimate_type type; |
542 | 549 | /** |
543 | | - * Set only when "cardinality" is not VX_CARD_UNKNOWN |
| 550 | + * Set only when "type" is not VX_ESTIMATE_UNKNOWN. |
544 | 551 | */ |
545 | | - uint64_t rows; |
546 | | -} vx_data_source_row_count; |
| 552 | + uint64_t estimate; |
| 553 | +} vx_estimate; |
547 | 554 |
|
548 | 555 | /** |
549 | 556 | * Options supplied for opening a file. |
@@ -662,18 +669,6 @@ typedef struct { |
662 | 669 | bool ordered; |
663 | 670 | } vx_scan_options; |
664 | 671 |
|
665 | | -/** |
666 | | - * Used for estimating number of partitions in a data source or number of rows |
667 | | - * in a partition. |
668 | | - */ |
669 | | -typedef struct { |
670 | | - vx_estimate_type type; |
671 | | - /** |
672 | | - * Set only when "type" is not VX_ESTIMATE_UNKNOWN. |
673 | | - */ |
674 | | - uint64_t estimate; |
675 | | -} vx_estimate; |
676 | | - |
677 | 672 | #ifdef __cplusplus |
678 | 673 | extern "C" { |
679 | 674 | #endif // __cplusplus |
@@ -921,7 +916,7 @@ const vx_dtype *vx_data_source_dtype(const vx_data_source *ds); |
921 | 916 | /** |
922 | 917 | * Write data source's row count estimate into "row_count". |
923 | 918 | */ |
924 | | -void vx_data_source_get_row_count(const vx_data_source *ds, vx_data_source_row_count *row_count); |
| 919 | +void vx_data_source_get_row_count(const vx_data_source *ds, vx_estimate *row_count); |
925 | 920 |
|
926 | 921 | /** |
927 | 922 | * Clone a borrowed [`vx_dtype`], returning an owned [`vx_dtype`]. |
@@ -1319,6 +1314,17 @@ vx_partition *vx_scan_next_partition(vx_scan *scan, vx_error **err); |
1319 | 1314 | */ |
1320 | 1315 | int vx_partition_row_count(const vx_partition *partition, vx_estimate *count, vx_error **err); |
1321 | 1316 |
|
| 1317 | +/** |
| 1318 | + * Scan partition to ArrowArrayStream. |
| 1319 | + * Consumes partition fully: subsequent calls to vx_partition_scan_arrow or |
| 1320 | + * vx_partition_next are undefined behaviour. |
| 1321 | + * This call blocks current thread until underlying stream is fully consumed. |
| 1322 | + * |
| 1323 | + * Caller must not free partition after calling this function. |
| 1324 | + * |
| 1325 | + * On success, sets "stream" and returns 0. |
| 1326 | + * On error, sets "err" and returns 1, freeing the partition. |
| 1327 | + */ |
1322 | 1328 | int vx_partition_scan_arrow(const vx_session *session, |
1323 | 1329 | vx_partition *partition, |
1324 | 1330 | FFI_ArrowArrayStream *stream, |
|
0 commit comments