diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 8e5b1d77e9cc..eebf37a2becd 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -764,11 +764,6 @@ template #endif // CUDART_VERSION >= 12080 } -inline auto CachingThrustPolicy() { - XGBCachingDeviceAllocator alloc; - return thrust::cuda::par_nosync(alloc).on(::xgboost::curt::DefaultStream()); -} - // Force nvcc to load data as constant template class LDGIterator { diff --git a/src/tree/constraints.cu b/src/tree/constraints.cu index ff6398db9190..d6941a7d5a2b 100644 --- a/src/tree/constraints.cu +++ b/src/tree/constraints.cu @@ -281,8 +281,9 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature, int32_t fe } } -void FeatureInteractionConstraintDevice::Split(bst_node_t node_id, bst_feature_t feature_id, - bst_node_t left_id, bst_node_t right_id) { +void FeatureInteractionConstraintDevice::Split(Context const* ctx, bst_node_t node_id, + bst_feature_t feature_id, bst_node_t left_id, + bst_node_t right_id) { if (!has_constraint_) { return; } @@ -310,7 +311,7 @@ void FeatureInteractionConstraintDevice::Split(bst_node_t node_id, bst_feature_t launch_split(InteractionConstraintSplitKernel, feature_buffer_, feature_id, node, left, right); // clear the buffer after use - thrust::fill_n(dh::CachingThrustPolicy(), feature_buffer_.Data(), feature_buffer_.NumValues(), 0); + thrust::fill_n(ctx->CUDACtx()->CTP(), feature_buffer_.Data(), feature_buffer_.NumValues(), 0); } } // namespace xgboost diff --git a/src/tree/constraints.cuh b/src/tree/constraints.cuh index 257eae2a88f8..4fa2feeb12f1 100644 --- a/src/tree/constraints.cuh +++ b/src/tree/constraints.cuh @@ -92,7 +92,8 @@ struct FeatureInteractionConstraintDevice { common::Span Query(common::Span feature_list, bst_node_t nidx); /*! \brief Apply split for node_id. */ - void Split(bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id); + void Split(Context const* ctx, bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, + bst_node_t right_id); }; } // namespace xgboost diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 5e04212aaece..d54173182cbf 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -650,7 +650,7 @@ struct GPUHistMakerDevice { evaluator_.ApplyTreeSplit(candidate, p_tree); const auto& parent = tree[candidate.nidx]; - interaction_constraints.Split(candidate.nidx, parent.SplitIndex(), parent.LeftChild(), + interaction_constraints.Split(ctx_, candidate.nidx, parent.SplitIndex(), parent.LeftChild(), parent.RightChild()); } diff --git a/tests/cpp/common/test_device_vector.cu b/tests/cpp/common/test_device_vector.cu index 16a847648eb7..d424d4bd1daa 100644 --- a/tests/cpp/common/test_device_vector.cu +++ b/tests/cpp/common/test_device_vector.cu @@ -1,5 +1,5 @@ /** - * Copyright 2024-2025, XGBoost Contributors + * Copyright 2024-2026, XGBoost Contributors */ #include #include // for make_counting_iterator @@ -8,9 +8,11 @@ #include // for iota #include // for thread +#include "../../../src/common/cuda_context.cuh" // for CUDAContext #include "../../../src/common/cuda_rt_utils.h" // for DrVersion -#include "../../../src/common/device_helpers.cuh" // for CachingThrustPolicy, PinnedMemory +#include "../../../src/common/device_helpers.cuh" // for PinnedMemory #include "../../../src/common/device_vector.cuh" +#include "../helpers.h" // for MakeCUDACtx #include "xgboost/global_config.h" // for GlobalConfigThreadLocalStore #include "xgboost/windefs.h" // for xgboost_IS_WIN @@ -33,6 +35,7 @@ TEST(AsyncPoolAllocator, Basic) { #endif // !defined(XGBOOST_USE_RMM) TEST(DeviceUVector, Basic) { + auto ctx = xgboost::MakeCUDACtx(0); GlobalMemoryLogger().Clear(); std::int32_t verbosity{3}; std::swap(verbosity, xgboost::GlobalConfigThreadLocalStore::Get()->verbosity); @@ -51,11 +54,11 @@ TEST(DeviceUVector, Basic) { ASSERT_EQ(std::distance(uvec1.begin(), uvec1.end()), uvec1.size()); auto orig = uvec1.size(); - thrust::sequence(dh::CachingThrustPolicy(), uvec1.begin(), uvec1.end(), 0); + thrust::sequence(ctx.CUDACtx()->CTP(), uvec1.begin(), uvec1.end(), 0); uvec1.resize(32); ASSERT_EQ(uvec1.size(), 32); ASSERT_EQ(uvec1.Capacity(), 32); - auto eq = thrust::equal(dh::CachingThrustPolicy(), uvec1.cbegin(), uvec1.cbegin() + orig, + auto eq = thrust::equal(ctx.CUDACtx()->CTP(), uvec1.cbegin(), uvec1.cbegin() + orig, thrust::make_counting_iterator(0)); ASSERT_TRUE(eq); @@ -69,6 +72,7 @@ namespace { class TestVirtualMem : public ::testing::TestWithParam { public: void Run() { + auto ctx = xgboost::MakeCUDACtx(0); auto type = this->GetParam(); detail::GrowOnlyVirtualMemVec vec{type}; auto prop = xgboost::cudr::MakeAllocProp(type); @@ -86,7 +90,7 @@ class TestVirtualMem : public ::testing::TestWithParam { }; auto fill = [&](std::int32_t n_orig, xgboost::common::Span data) { if (type == CU_MEM_LOCATION_TYPE_DEVICE) { - thrust::sequence(dh::CachingThrustPolicy(), data.data() + n_orig, data.data() + data.size(), + thrust::sequence(ctx.CUDACtx()->CTP(), data.data() + n_orig, data.data() + data.size(), n_orig); dh::safe_cuda(cudaMemcpy(h_data.data(), data.data(), data.size_bytes(), cudaMemcpyDefault)); } else { @@ -151,7 +155,7 @@ TEST(TestVirtualMem, Version) { PinnedMemory pinned; #if defined(xgboost_IS_WIN) ASSERT_FALSE(pinned.IsVm()); -#else // defined(xgboost_IS_WIN) +#else // defined(xgboost_IS_WIN) if (major == 12 && minor >= 5 || major > 12) { ASSERT_TRUE(pinned.IsVm()); } else { diff --git a/tests/cpp/tree/test_constraints.cu b/tests/cpp/tree/test_constraints.cu index 0d19e2325c95..7ae84f10b8b8 100644 --- a/tests/cpp/tree/test_constraints.cu +++ b/tests/cpp/tree/test_constraints.cu @@ -21,15 +21,11 @@ struct FConstraintWrapper : public FeatureInteractionConstraintDevice { common::Span GetNodeConstraints() { return FeatureInteractionConstraintDevice::s_node_constraints_; } - FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) : - FeatureInteractionConstraintDevice(param, n_features) {} + FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) + : FeatureInteractionConstraintDevice(param, n_features) {} - dh::device_vector const& GetDSets() const { - return d_sets_; - } - dh::device_vector const& GetDSetsPtr() const { - return d_sets_ptr_; - } + dh::device_vector const& GetDSets() const { return d_sets_; } + dh::device_vector const& GetDSetsPtr() const { return d_sets_ptr_; } }; std::string GetConstraintsStr() { @@ -46,12 +42,11 @@ tree::TrainParam GetParameter() { void CompareBitField(LBitField64 d_field, std::set positions) { std::vector h_field_storage(d_field.Bits().size()); - thrust::copy(thrust::device_ptr(d_field.Bits().data()), - thrust::device_ptr( - d_field.Bits().data() + d_field.Bits().size()), - h_field_storage.data()); - LBitField64 h_field{ {h_field_storage.data(), - h_field_storage.data() + h_field_storage.size()} }; + thrust::copy( + thrust::device_ptr(d_field.Bits().data()), + thrust::device_ptr(d_field.Bits().data() + d_field.Bits().size()), + h_field_storage.data()); + LBitField64 h_field{{h_field_storage.data(), h_field_storage.data() + h_field_storage.size()}}; for (size_t i = 0; i < h_field.Capacity(); ++i) { if (positions.find(i) != positions.cend()) { @@ -64,7 +59,6 @@ void CompareBitField(LBitField64 d_field, std::set positions) { } // anonymous namespace - TEST(GPUFeatureInteractionConstraint, Init) { { int32_t constexpr kFeatures = 6; @@ -75,12 +69,10 @@ TEST(GPUFeatureInteractionConstraint, Init) { for (LBitField64 const& d_node : s_nodes_constraints) { std::vector h_node_storage(d_node.Bits().size()); thrust::copy(thrust::device_ptr(d_node.Bits().data()), - thrust::device_ptr( - d_node.Bits().data() + d_node.Bits().size()), + thrust::device_ptr(d_node.Bits().data() + + d_node.Bits().size()), h_node_storage.data()); - LBitField64 h_node { - {h_node_storage.data(), h_node_storage.data() + h_node_storage.size()} - }; + LBitField64 h_node{{h_node_storage.data(), h_node_storage.data() + h_node_storage.size()}}; // no feature is attached to node. for (size_t i = 0; i < h_node.Capacity(); ++i) { ASSERT_FALSE(h_node.Check(i)); @@ -94,8 +86,8 @@ TEST(GPUFeatureInteractionConstraint, Init) { tree::TrainParam param = GetParameter(); param.interaction_constraints = R"([[0, 1, 3], [3, 5, 6]])"; FConstraintWrapper constraints(param, kFeatures); - std::vector h_sets {0, 0, 0, 1, 1, 1}; - std::vector h_sets_ptr {0, 1, 2, 2, 4, 4, 5, 6}; + std::vector h_sets{0, 0, 0, 1, 1, 1}; + std::vector h_sets_ptr{0, 1, 2, 2, 4, 4, 5, 6}; auto d_sets = constraints.GetDSets(); ASSERT_EQ(h_sets.size(), d_sets.size()); auto d_sets_ptr = constraints.GetDSetsPtr(); @@ -120,18 +112,19 @@ TEST(GPUFeatureInteractionConstraint, Init) { auto _128_end = d_sets_ptr[128 + 1]; ASSERT_EQ(_128_end - _128_beg, 2); ASSERT_EQ(d_sets[_128_beg], 1); - ASSERT_EQ(d_sets[_128_end-1], 2); + ASSERT_EQ(d_sets[_128_end - 1], 2); } } TEST(GPUFeatureInteractionConstraint, Split) { + auto ctx = MakeCUDACtx(0); tree::TrainParam param = GetParameter(); int32_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); { LBitField64 d_node[3]; - constraints.Split(0, /*feature_id=*/1, 1, 2); + constraints.Split(&ctx, 0, /*feature_id=*/1, 1, 2); for (size_t nid = 0; nid < 3; ++nid) { d_node[nid] = constraints.GetNodeConstraints()[nid]; ASSERT_EQ(d_node[nid].Bits().size(), 1); @@ -141,7 +134,7 @@ TEST(GPUFeatureInteractionConstraint, Split) { { LBitField64 d_node[5]; - constraints.Split(1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4); + constraints.Split(&ctx, 1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4); for (auto nid : {1, 3, 4}) { d_node[nid] = constraints.GetNodeConstraints()[nid]; CompareBitField(d_node[nid], {0, 1, 2}); @@ -165,24 +158,22 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) { } { - constraints.Split(/*node_id=*/ 0, /*feature_id=*/ 1, 1, 2); + constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/1, 1, 2); auto span = constraints.QueryNode(&ctx, 0); - std::vector h_result (span.size()); + std::vector h_result(span.size()); thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), - h_result.begin()); + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 2); ASSERT_EQ(h_result[0], 1); ASSERT_EQ(h_result[1], 2); } { - constraints.Split(1, /*feature_id=*/0, 3, 4); + constraints.Split(&ctx, 1, /*feature_id=*/0, 3, 4); auto span = constraints.QueryNode(&ctx, 1); - std::vector h_result (span.size()); + std::vector h_result(span.size()); thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), - h_result.begin()); + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 0); ASSERT_EQ(h_result[1], 1); @@ -192,8 +183,7 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) { span = constraints.QueryNode(&ctx, 3); h_result.resize(span.size()); thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), - h_result.begin()); + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 0); ASSERT_EQ(h_result[1], 1); @@ -204,12 +194,11 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) { tree::TrainParam large_param = GetParameter(); large_param.interaction_constraints = R"([[1, 139], [244, 0], [139, 221]])"; FConstraintWrapper large_features(large_param, 256); - large_features.Split(0, 139, 1, 2); + large_features.Split(&ctx, 0, 139, 1, 2); auto span = large_features.QueryNode(&ctx, 0); - std::vector h_result (span.size()); + std::vector h_result(span.size()); thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), - h_result.begin()); + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 1); ASSERT_EQ(h_result[1], 139); @@ -230,12 +219,13 @@ void CompareFeatureList(common::Span s_output, } // anonymous namespace TEST(GPUFeatureInteractionConstraint, Query) { + auto ctx = MakeCUDACtx(0); { tree::TrainParam param = GetParameter(); bst_feature_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); + std::vector h_input_feature_list{0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list(h_input_feature_list); common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 0); @@ -245,9 +235,9 @@ TEST(GPUFeatureInteractionConstraint, Query) { tree::TrainParam param = GetParameter(); bst_feature_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); - constraints.Split(/*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2); - constraints.Split(/*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4); - constraints.Split(/*node_id=*/4, /*feature_id=*/3, /*left_id=*/5, /*right_id=*/6); + constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2); + constraints.Split(&ctx, /*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4); + constraints.Split(&ctx, /*node_id=*/4, /*feature_id=*/3, /*left_id=*/5, /*right_id=*/6); /* * (node id) [allowed features] * @@ -263,8 +253,8 @@ TEST(GPUFeatureInteractionConstraint, Query) { * */ - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); + std::vector h_input_feature_list{0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list(h_input_feature_list); common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 1); @@ -289,10 +279,10 @@ TEST(GPUFeatureInteractionConstraint, Query) { param.interaction_constraints = constraints_str; FConstraintWrapper constraints(param, kFeatures); - constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); + constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); + std::vector h_input_feature_list{0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list(h_input_feature_list); common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 1); @@ -306,10 +296,10 @@ TEST(GPUFeatureInteractionConstraint, Query) { std::string const constraints_str = R"constraint([[0, 1]])constraint"; param.interaction_constraints = constraints_str; FConstraintWrapper constraints(param, kFeatures); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); + std::vector h_input_feature_list{0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list(h_input_feature_list); common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); - constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); + constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); auto s_output = constraints.Query(s_input_feature_list, 1); CompareFeatureList(s_output, {2}); s_output = constraints.Query(s_input_feature_list, 2);