Skip to content

Commit 66c090b

Browse files
Prepare stream testing job for static cudart
Rather than using `LD_PRELOAD` to swap in our redirects at runtime, rewrite the ELF symbols at compile time.
1 parent 4c281e3 commit 66c090b

7 files changed

Lines changed: 149 additions & 72 deletions

File tree

cpp/CMakeLists.txt

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,8 @@ option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compila
6161
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
6262
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
6363

64-
set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
65-
66-
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
67-
set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF)
68-
endif()
69-
70-
option(
71-
CUDF_BUILD_STREAMS_TEST_UTIL
72-
"Whether to build the utilities for stream testing contained in libcudf"
73-
${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
64+
option(CUDF_BUILD_STREAMS_TEST_UTIL
65+
"Whether to build the utilities for stream testing contained in libcudf" OFF
7466
)
7567
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
7668
option(CUDF_CLANG_TIDY "Enable clang-tidy during compilation" OFF)
@@ -1080,8 +1072,12 @@ if(CUDF_BUILD_TESTUTIL)
10801072
)
10811073

10821074
target_link_libraries(
1083-
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
1084-
$<TARGET_NAME_IF_EXISTS:conda_env>
1075+
cudftestutil
1076+
INTERFACE cuco::cuco
1077+
Threads::Threads
1078+
cudf
1079+
$<$<NOT:$<BOOL:${CUDF_BUILD_STREAMS_TEST_UTIL}>>:cudftest_default_stream>
1080+
$<TARGET_NAME_IF_EXISTS:conda_env>
10851081
)
10861082

10871083
target_include_directories(
@@ -1135,19 +1131,12 @@ endif()
11351131

11361132
# * build cudf_identify_stream_usage --------------------------------------------------------------
11371133
if(CUDF_BUILD_STREAMS_TEST_UTIL)
1138-
if(CUDA_STATIC_RUNTIME)
1139-
message(
1140-
FATAL_ERROR
1141-
"Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF."
1142-
)
1143-
endif()
1144-
11451134
# Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING
11461135
# on and one with it set to off. Each test will then be configured to use the appropriate library
11471136
# depending via ctest and whether it has been updated to expose public stream APIs.
11481137
foreach(_mode cudf testing)
11491138
set(_tgt "cudf_identify_stream_usage_mode_${_mode}")
1150-
add_library(${_tgt} SHARED tests/utilities/identify_stream_usage.cpp)
1139+
add_library(${_tgt} STATIC tests/utilities/identify_stream_usage.cpp)
11511140

11521141
if(CUDF_USE_PER_THREAD_DEFAULT_STREAM)
11531142
target_compile_definitions(
@@ -1168,7 +1157,11 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
11681157
${_tgt} PRIVATE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
11691158
)
11701159
target_include_directories(${_tgt} PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>")
1171-
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
1160+
target_link_libraries(
1161+
${_tgt}
1162+
PUBLIC $<COMPILE_ONLY:CUDA::cudart> rmm::rmm
1163+
PRIVATE cudf
1164+
)
11721165

11731166
rapids_cuda_set_runtime(${_tgt} USE_STATIC ${CUDA_STATIC_RUNTIME})
11741167
add_library(cudf::${_tgt} ALIAS ${_tgt})
@@ -1177,6 +1170,28 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
11771170
target_compile_definitions(${_tgt} PUBLIC STREAM_MODE_TESTING)
11781171
endif()
11791172
endforeach()
1173+
1174+
find_package(Python3 REQUIRED COMPONENTS Interpreter)
1175+
1176+
# Redirect CUDA calls for a target to use our stream testing versions
1177+
function(fixup_target_for_stream_testing tgt)
1178+
set(fixup_script "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/cmake/fixup_target_for_stream_testing.py")
1179+
add_custom_command(
1180+
TARGET ${tgt}
1181+
PRE_LINK
1182+
COMMAND Python3::Interpreter "${fixup_script}" $<TARGET_OBJECTS:${tgt}>
1183+
VERBATIM COMMAND_EXPAND_LISTS
1184+
COMMENT "Redirecting CUDA calls for ${tgt}"
1185+
)
1186+
set_property(
1187+
TARGET ${tgt}
1188+
PROPERTY LINK_DEPENDS "${fixup_script}"
1189+
APPEND
1190+
)
1191+
endfunction()
1192+
1193+
fixup_target_for_stream_testing(cudf)
1194+
target_compile_definitions(cudf PRIVATE CUDF_BUILD_STREAMS_TEST_UTIL)
11801195
endif()
11811196

11821197
# ##################################################################################################
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import sys
5+
6+
import lief
7+
8+
# This list must be kept in sync with cpp/tests/utilities/identify_stream_usage.cpp
9+
SYMBOLS_TO_REWRITE = [
10+
"cudaEventRecord",
11+
"cudaEventRecordWithFlags",
12+
"cudaLaunchKernel",
13+
"__cudaLaunchKernel",
14+
"__cudaLaunchKernel_ptsz",
15+
"cudaMemPrefetchAsync",
16+
"cudaMemcpy2DAsync",
17+
"cudaMemcpy2DFromArrayAsync",
18+
"cudaMemcpy2DToArrayAsync",
19+
"cudaMemcpy3DAsync",
20+
"cudaMemcpy3DPeerAsync",
21+
"cudaMemcpyAsync",
22+
"cudaMemcpyFromSymbolAsync",
23+
"cudaMemcpyToSymbolAsync",
24+
"cudaMemset2DAsync",
25+
"cudaMemset3DAsync",
26+
"cudaMemsetAsync",
27+
"cudaFreeAsync",
28+
"cudaMallocAsync",
29+
"cudaMallocFromPoolAsync",
30+
]
31+
32+
33+
for filename in sys.argv[1:]:
34+
elf = lief.ELF.parse(filename)
35+
36+
for symbol_name in SYMBOLS_TO_REWRITE:
37+
if symbol := elf.get_symbol(symbol_name):
38+
symbol.name = f"cudf_{symbol.name}"
39+
40+
elf.write(filename)

cpp/include/cudf_test/default_stream.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -17,12 +17,11 @@ namespace test {
1717
*
1818
* The standard behavior of this function is to return cudf's default stream
1919
* (cudf::get_default_stream). This function is primarily provided as an
20-
* overload target for preload libraries (via LD_PRELOAD) so that the default
21-
* stream used for tests may be modified for tracking purposes. All tests of
22-
* public APIs that accept streams should pass `cudf::test::get_default_stream`
23-
* as the stream argument so that a preload library changing the behavior of
24-
* this function will trigger those tests to run on a different stream than
25-
* `cudf::get_default_stream`.
20+
* overload target so that the default stream used for tests may be modified
21+
* for tracking purposes. All tests of public APIs that accept streams should
22+
* pass `cudf::test::get_default_stream` as the stream argument so that a
23+
* library changing the behavior of this function will trigger those tests to
24+
* run on a different stream than `cudf::get_default_stream`.
2625
*
2726
* @return The default stream to use for tests.
2827
*/

cpp/src/utilities/default_stream.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -17,6 +17,18 @@ rmm::cuda_stream_view const default_stream_value{rmm::cuda_stream_per_thread};
1717
rmm::cuda_stream_view const default_stream_value{};
1818
#endif
1919

20+
rmm::cuda_stream_view const get_default_stream()
21+
{
22+
static auto const default_stream = []() {
23+
if (std::getenv("CUDF_PER_THREAD_STREAM") != nullptr) {
24+
return rmm::cuda_stream_per_thread;
25+
} else {
26+
return detail::default_stream_value;
27+
}
28+
}();
29+
return default_stream;
30+
}
31+
2032
} // namespace detail
2133

2234
/**

cpp/tests/CMakeLists.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ function(ConfigureTest CMAKE_TEST_NAME)
7171

7272
if(CUDF_BUILD_STREAMS_TEST_UTIL)
7373
set_tests_properties(
74-
${CMAKE_TEST_NAME}
75-
PROPERTIES
76-
ENVIRONMENT
77-
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
74+
${CMAKE_TEST_NAME} PROPERTIES ENVIRONMENT
75+
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default"
7876
)
77+
target_link_libraries(
78+
${CMAKE_TEST_NAME} PRIVATE "cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}"
79+
)
80+
fixup_target_for_stream_testing(${CMAKE_TEST_NAME})
7981
endif()
8082
endfunction()
8183

cpp/tests/utilities/identify_stream_usage.cpp

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
#include <string>
2020
#include <unordered_map>
2121

22-
// This file is compiled into a separate library that is dynamically loaded with LD_PRELOAD at
23-
// runtime to libcudf to override some stream-related symbols in libcudf. The goal of such a library
24-
// is to verify if the stream/stream pool is being correctly forwarded between API calls.
22+
// This file is compiled into a separate library that is statically linked to tests to
23+
// override some stream-related symbols in libcudf. The goal of such a library is to
24+
// verify if the stream/stream pool is being correctly forwarded between API calls.
2525
//
2626
// We control whether to override cudf::test::get_default_stream or
2727
// cudf::get_default_stream with a compile-time flag. The behaviour of tests
@@ -44,19 +44,46 @@
4444

4545
namespace cudf {
4646

47-
#ifdef STREAM_MODE_TESTING
48-
namespace test {
47+
namespace detail {
48+
49+
#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM)
50+
rmm::cuda_stream_view const default_stream_value{rmm::cuda_stream_per_thread};
51+
#else
52+
rmm::cuda_stream_view const default_stream_value{};
4953
#endif
5054

55+
} // namespace detail
56+
5157
rmm::cuda_stream_view const get_default_stream()
5258
{
59+
#ifdef STREAM_MODE_TESTING
60+
static auto const default_stream = []() {
61+
if (std::getenv("CUDF_PER_THREAD_STREAM") != nullptr) {
62+
return rmm::cuda_stream_per_thread;
63+
} else {
64+
return detail::default_stream_value;
65+
}
66+
}();
67+
return default_stream;
68+
#else
5369
static rmm::cuda_stream stream{};
5470
return stream;
71+
#endif
5572
}
5673

74+
namespace test {
75+
76+
rmm::cuda_stream_view const get_default_stream()
77+
{
5778
#ifdef STREAM_MODE_TESTING
58-
} // namespace test
79+
static rmm::cuda_stream stream{};
80+
return stream;
81+
#else
82+
return cudf::get_default_stream();
5983
#endif
84+
}
85+
86+
} // namespace test
6087

6188
#ifdef STREAM_MODE_TESTING
6289
namespace detail {
@@ -119,15 +146,6 @@ void check_stream_and_error(cudaStream_t stream)
119146
}
120147
}
121148

122-
/**
123-
* @brief Container for CUDA APIs that have been overloaded using DEFINE_OVERLOAD.
124-
*
125-
* This variable must be initialized before everything else.
126-
*
127-
* @see find_originals for a description of the priorities
128-
*/
129-
__attribute__((init_priority(1001))) std::unordered_map<std::string, void*> originals;
130-
131149
/**
132150
* @brief Macro for generating functions to override existing CUDA functions.
133151
*
@@ -145,15 +163,12 @@ __attribute__((init_priority(1001))) std::unordered_map<std::string, void*> orig
145163
* @param signature The function signature (must include names, not just types).
146164
* @parameter arguments The function arguments (names only, no types).
147165
*/
148-
#define DEFINE_OVERLOAD(function, signature, arguments) \
149-
using function##_t = cudaError_t (*)(signature); \
150-
\
151-
cudaError_t function(signature) \
152-
{ \
153-
check_stream_and_error(stream); \
154-
return ((function##_t)originals[#function])(arguments); \
155-
} \
156-
__attribute__((constructor(1002))) void queue_##function() { originals[#function] = nullptr; }
166+
#define DEFINE_OVERLOAD(function, signature, arguments) \
167+
extern "C" cudaError_t cudf_##function(signature) \
168+
{ \
169+
check_stream_and_error(stream); \
170+
return function(arguments); \
171+
}
157172

158173
/**
159174
* @brief Helper macro to define macro arguments that contain a comma.
@@ -177,6 +192,8 @@ __attribute__((init_priority(1001))) std::unordered_map<std::string, void*> orig
177192
- https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP
178193
- https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH
179194
- https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html#group__CUDART__HIGHLEVEL
195+
196+
This list must be kept in sync with cpp/cmake/fixup_target_for_stream_testing.py
180197
*/
181198
// clang-format on
182199

@@ -335,18 +352,3 @@ DEFINE_OVERLOAD(cudaMallocAsync,
335352
DEFINE_OVERLOAD(cudaMallocFromPoolAsync,
336353
ARG(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream),
337354
ARG(ptr, size, memPool, stream));
338-
339-
/**
340-
* @brief Function to collect all the original CUDA symbols corresponding to overloaded functions.
341-
*
342-
* Note on priorities:
343-
* - `originals` must be initialized first, so it is 1001.
344-
* - The function names must be added to originals next in the macro, so those are 1002.
345-
* - Finally, this function actually finds the original symbols so it is 1003.
346-
*/
347-
__attribute__((constructor(1003))) void find_originals()
348-
{
349-
for (auto it : originals) {
350-
originals[it.first] = dlsym(RTLD_NEXT, it.first.data());
351-
}
352-
}

dependencies.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ files:
154154
- build_base
155155
- cuda
156156
- cuda_version
157+
- stream_test_dependencies
157158
docs:
158159
output: none
159160
includes:
@@ -1343,3 +1344,9 @@ dependencies:
13431344
- output_types: [conda, requirements, pyproject]
13441345
packages:
13451346
- structlog
1347+
stream_test_dependencies:
1348+
common:
1349+
- output_types: conda
1350+
packages:
1351+
- python
1352+
- py-lief

0 commit comments

Comments
 (0)