Skip to content

Commit bd2fc20

Browse files
authored
Derive host metadata from exported schema (#6)
Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 540cd40 commit bd2fc20

3 files changed

Lines changed: 23 additions & 14 deletions

File tree

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ permissions:
1313
jobs:
1414
build:
1515
name: Build
16-
runs-on: runs-on=${{ github.run_id }}/runner=gpu/tag=cudf-test-harness
16+
runs-on: runs-on=${{ github.run_id }}/family=g5+g4dn+g6.*/cpu=8/image=ubuntu24-gpu-x64/spot=true/extras=s3-cache/tag=cudf-test-harness
1717
container:
1818
image: nvidia/cuda:13.0.0-devel-ubuntu24.04
1919

@@ -58,7 +58,8 @@ jobs:
5858
- name: Build
5959
run: |
6060
cmake -S . -B build -G Ninja \
61-
-DCMAKE_BUILD_TYPE=Release \
61+
-DCMAKE_BUILD_TYPE=Debug \
62+
-DCMAKE_CUDA_ARCHITECTURES=90-virtual \
6263
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
6364
-DCMAKE_C_COMPILER_LAUNCHER=sccache \
6465
-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ FetchContent_Declare(
3737
GIT_REPOSITORY https://github.com/rapidsai/cudf.git
3838
# rapidsai/cudf#22620: fix Arrow Device string-view imports with producer-owned
3939
# ArrowArray.private_data.
40-
GIT_TAG 2a2b126f870bb1d1fdad62ac1c726ccad6001ec6
40+
GIT_TAG 3ad0f0c9aeed6e5a521b07587e97c58943a714f7
4141
SOURCE_SUBDIR cpp
4242
)
4343
FetchContent_MakeAvailable(cudf)

src/main.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
#include <dlfcn.h>
33
#include <iostream>
44
#include <memory>
5+
#include <stdexcept>
56
#include <string>
7+
#include <vector>
68

79
#include <cudf/column/column.hpp>
810
#include <cudf/column/column_view.hpp>
@@ -152,27 +154,33 @@ int run_check(const char *library_path) {
152154
}
153155
}
154156

155-
// Convert to cuDF column view using from_arrow_device_column
156-
std::cout << "\nConverting to cuDF column view...\n";
157+
std::cout << "\nConverting to cuDF table view...\n";
157158
try {
158159
auto table = cudf::from_arrow_device(&schema, &device_array);
159160

160161
std::cout << "Conversion successful!\n\n";
161162

162-
// convert to host array, call the verifier inside of the shared library.
163+
// Round-trip to host Arrow and validate in the producer library.
163164
auto host_array = cudf::to_arrow_host(*table);
164-
auto host_metadata = std::vector<cudf::column_metadata>{
165-
cudf::column_metadata{"prims"},
166-
cudf::column_metadata{"decimals"},
167-
cudf::column_metadata{"strings"},
168-
cudf::column_metadata{"dates"},
169-
};
165+
auto host_metadata = std::vector<cudf::column_metadata>{};
166+
auto const num_columns = table->num_columns();
167+
if (schema.n_children != num_columns || schema.children == nullptr) {
168+
throw std::runtime_error("exported schema does not match imported table");
169+
}
170+
host_metadata.reserve(num_columns);
171+
for (auto i = 0; i < num_columns; ++i) {
172+
auto const child_schema = schema.children[i];
173+
if (child_schema == nullptr || child_schema->name == nullptr) {
174+
throw std::runtime_error("exported schema child is missing a name");
175+
}
176+
host_metadata.push_back(cudf::column_metadata{child_schema->name});
177+
}
170178
auto host_schema = cudf::to_arrow_schema(*table, host_metadata);
171179
if (validate_array(host_schema.get(), &host_array->array) != 0) {
172-
std::cerr << "\nValidation failed!\n";
180+
throw std::runtime_error("validation failed");
173181
}
174182
} catch (const std::exception &e) {
175-
std::cerr << "Error: Failed to convert Arrow array to cuDF column: " << e.what() << "\n";
183+
std::cerr << "Error: Failed to validate Arrow device export with cuDF: " << e.what() << "\n";
176184

177185
// Release the Arrow array
178186
if (device_array.array.release) {

0 commit comments

Comments
 (0)