Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -764,11 +764,6 @@ template <cudaMemcpyKind kind, typename T, typename U>
#endif // CUDART_VERSION >= 12080
}

inline auto CachingThrustPolicy() {
XGBCachingDeviceAllocator<char> alloc;
return thrust::cuda::par_nosync(alloc).on(::xgboost::curt::DefaultStream());
}

// Force nvcc to load data as constant
template <typename T>
class LDGIterator {
Expand Down
7 changes: 4 additions & 3 deletions src/tree/constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,9 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature, int32_t fe
}
}

void FeatureInteractionConstraintDevice::Split(bst_node_t node_id, bst_feature_t feature_id,
bst_node_t left_id, bst_node_t right_id) {
void FeatureInteractionConstraintDevice::Split(Context const* ctx, bst_node_t node_id,
bst_feature_t feature_id, bst_node_t left_id,
bst_node_t right_id) {
if (!has_constraint_) {
return;
}
Expand Down Expand Up @@ -310,7 +311,7 @@ void FeatureInteractionConstraintDevice::Split(bst_node_t node_id, bst_feature_t
launch_split(InteractionConstraintSplitKernel, feature_buffer_, feature_id, node, left, right);

// clear the buffer after use
thrust::fill_n(dh::CachingThrustPolicy(), feature_buffer_.Data(), feature_buffer_.NumValues(), 0);
thrust::fill_n(ctx->CUDACtx()->CTP(), feature_buffer_.Data(), feature_buffer_.NumValues(), 0);
}

} // namespace xgboost
3 changes: 2 additions & 1 deletion src/tree/constraints.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ struct FeatureInteractionConstraintDevice {
common::Span<bst_feature_t const> Query(common::Span<bst_feature_t const> feature_list,
bst_node_t nidx);
/*! \brief Apply split for node_id. */
void Split(bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id);
void Split(Context const* ctx, bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id,
bst_node_t right_id);
};

} // namespace xgboost
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ struct GPUHistMakerDevice {
evaluator_.ApplyTreeSplit(candidate, p_tree);

const auto& parent = tree[candidate.nidx];
interaction_constraints.Split(candidate.nidx, parent.SplitIndex(), parent.LeftChild(),
interaction_constraints.Split(ctx_, candidate.nidx, parent.SplitIndex(), parent.LeftChild(),
parent.RightChild());
}

Expand Down
16 changes: 10 additions & 6 deletions tests/cpp/common/test_device_vector.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2024-2025, XGBoost Contributors
* Copyright 2024-2026, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
Expand All @@ -8,9 +8,11 @@
#include <numeric> // for iota
#include <thread> // for thread

#include "../../../src/common/cuda_context.cuh" // for CUDAContext
#include "../../../src/common/cuda_rt_utils.h" // for DrVersion
#include "../../../src/common/device_helpers.cuh" // for CachingThrustPolicy, PinnedMemory
#include "../../../src/common/device_helpers.cuh" // for PinnedMemory
#include "../../../src/common/device_vector.cuh"
#include "../helpers.h" // for MakeCUDACtx
#include "xgboost/global_config.h" // for GlobalConfigThreadLocalStore
#include "xgboost/windefs.h" // for xgboost_IS_WIN

Expand All @@ -33,6 +35,7 @@ TEST(AsyncPoolAllocator, Basic) {
#endif // !defined(XGBOOST_USE_RMM)

TEST(DeviceUVector, Basic) {
auto ctx = xgboost::MakeCUDACtx(0);
GlobalMemoryLogger().Clear();
std::int32_t verbosity{3};
std::swap(verbosity, xgboost::GlobalConfigThreadLocalStore::Get()->verbosity);
Expand All @@ -51,11 +54,11 @@ TEST(DeviceUVector, Basic) {
ASSERT_EQ(std::distance(uvec1.begin(), uvec1.end()), uvec1.size());
auto orig = uvec1.size();

thrust::sequence(dh::CachingThrustPolicy(), uvec1.begin(), uvec1.end(), 0);
thrust::sequence(ctx.CUDACtx()->CTP(), uvec1.begin(), uvec1.end(), 0);
uvec1.resize(32);
ASSERT_EQ(uvec1.size(), 32);
ASSERT_EQ(uvec1.Capacity(), 32);
auto eq = thrust::equal(dh::CachingThrustPolicy(), uvec1.cbegin(), uvec1.cbegin() + orig,
auto eq = thrust::equal(ctx.CUDACtx()->CTP(), uvec1.cbegin(), uvec1.cbegin() + orig,
thrust::make_counting_iterator(0));
ASSERT_TRUE(eq);

Expand All @@ -69,6 +72,7 @@ namespace {
class TestVirtualMem : public ::testing::TestWithParam<CUmemLocationType> {
public:
void Run() {
auto ctx = xgboost::MakeCUDACtx(0);
auto type = this->GetParam();
detail::GrowOnlyVirtualMemVec vec{type};
auto prop = xgboost::cudr::MakeAllocProp(type);
Expand All @@ -86,7 +90,7 @@ class TestVirtualMem : public ::testing::TestWithParam<CUmemLocationType> {
};
auto fill = [&](std::int32_t n_orig, xgboost::common::Span<std::int32_t> data) {
if (type == CU_MEM_LOCATION_TYPE_DEVICE) {
thrust::sequence(dh::CachingThrustPolicy(), data.data() + n_orig, data.data() + data.size(),
thrust::sequence(ctx.CUDACtx()->CTP(), data.data() + n_orig, data.data() + data.size(),
n_orig);
dh::safe_cuda(cudaMemcpy(h_data.data(), data.data(), data.size_bytes(), cudaMemcpyDefault));
} else {
Expand Down Expand Up @@ -151,7 +155,7 @@ TEST(TestVirtualMem, Version) {
PinnedMemory pinned;
#if defined(xgboost_IS_WIN)
ASSERT_FALSE(pinned.IsVm());
#else // defined(xgboost_IS_WIN)
#else // defined(xgboost_IS_WIN)
if (major == 12 && minor >= 5 || major > 12) {
ASSERT_TRUE(pinned.IsVm());
} else {
Expand Down
94 changes: 42 additions & 52 deletions tests/cpp/tree/test_constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,11 @@ struct FConstraintWrapper : public FeatureInteractionConstraintDevice {
common::Span<LBitField64> GetNodeConstraints() {
return FeatureInteractionConstraintDevice::s_node_constraints_;
}
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :
FeatureInteractionConstraintDevice(param, n_features) {}
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features)
: FeatureInteractionConstraintDevice(param, n_features) {}

dh::device_vector<bst_feature_t> const& GetDSets() const {
return d_sets_;
}
dh::device_vector<size_t> const& GetDSetsPtr() const {
return d_sets_ptr_;
}
dh::device_vector<bst_feature_t> const& GetDSets() const { return d_sets_; }
dh::device_vector<size_t> const& GetDSetsPtr() const { return d_sets_ptr_; }
};

std::string GetConstraintsStr() {
Expand All @@ -46,12 +42,11 @@ tree::TrainParam GetParameter() {

void CompareBitField(LBitField64 d_field, std::set<uint32_t> positions) {
std::vector<LBitField64::value_type> h_field_storage(d_field.Bits().size());
thrust::copy(thrust::device_ptr<LBitField64::value_type>(d_field.Bits().data()),
thrust::device_ptr<LBitField64::value_type>(
d_field.Bits().data() + d_field.Bits().size()),
h_field_storage.data());
LBitField64 h_field{ {h_field_storage.data(),
h_field_storage.data() + h_field_storage.size()} };
thrust::copy(
thrust::device_ptr<LBitField64::value_type>(d_field.Bits().data()),
thrust::device_ptr<LBitField64::value_type>(d_field.Bits().data() + d_field.Bits().size()),
h_field_storage.data());
LBitField64 h_field{{h_field_storage.data(), h_field_storage.data() + h_field_storage.size()}};

for (size_t i = 0; i < h_field.Capacity(); ++i) {
if (positions.find(i) != positions.cend()) {
Expand All @@ -64,7 +59,6 @@ void CompareBitField(LBitField64 d_field, std::set<uint32_t> positions) {

} // anonymous namespace


TEST(GPUFeatureInteractionConstraint, Init) {
{
int32_t constexpr kFeatures = 6;
Expand All @@ -75,12 +69,10 @@ TEST(GPUFeatureInteractionConstraint, Init) {
for (LBitField64 const& d_node : s_nodes_constraints) {
std::vector<LBitField64::value_type> h_node_storage(d_node.Bits().size());
thrust::copy(thrust::device_ptr<LBitField64::value_type const>(d_node.Bits().data()),
thrust::device_ptr<LBitField64::value_type const>(
d_node.Bits().data() + d_node.Bits().size()),
thrust::device_ptr<LBitField64::value_type const>(d_node.Bits().data() +
d_node.Bits().size()),
h_node_storage.data());
LBitField64 h_node {
{h_node_storage.data(), h_node_storage.data() + h_node_storage.size()}
};
LBitField64 h_node{{h_node_storage.data(), h_node_storage.data() + h_node_storage.size()}};
// no feature is attached to node.
for (size_t i = 0; i < h_node.Capacity(); ++i) {
ASSERT_FALSE(h_node.Check(i));
Expand All @@ -94,8 +86,8 @@ TEST(GPUFeatureInteractionConstraint, Init) {
tree::TrainParam param = GetParameter();
param.interaction_constraints = R"([[0, 1, 3], [3, 5, 6]])";
FConstraintWrapper constraints(param, kFeatures);
std::vector<bst_feature_t> h_sets {0, 0, 0, 1, 1, 1};
std::vector<size_t> h_sets_ptr {0, 1, 2, 2, 4, 4, 5, 6};
std::vector<bst_feature_t> h_sets{0, 0, 0, 1, 1, 1};
std::vector<size_t> h_sets_ptr{0, 1, 2, 2, 4, 4, 5, 6};
auto d_sets = constraints.GetDSets();
ASSERT_EQ(h_sets.size(), d_sets.size());
auto d_sets_ptr = constraints.GetDSetsPtr();
Expand All @@ -120,18 +112,19 @@ TEST(GPUFeatureInteractionConstraint, Init) {
auto _128_end = d_sets_ptr[128 + 1];
ASSERT_EQ(_128_end - _128_beg, 2);
ASSERT_EQ(d_sets[_128_beg], 1);
ASSERT_EQ(d_sets[_128_end-1], 2);
ASSERT_EQ(d_sets[_128_end - 1], 2);
}
}

TEST(GPUFeatureInteractionConstraint, Split) {
auto ctx = MakeCUDACtx(0);
tree::TrainParam param = GetParameter();
int32_t constexpr kFeatures = 6;
FConstraintWrapper constraints(param, kFeatures);

{
LBitField64 d_node[3];
constraints.Split(0, /*feature_id=*/1, 1, 2);
constraints.Split(&ctx, 0, /*feature_id=*/1, 1, 2);
for (size_t nid = 0; nid < 3; ++nid) {
d_node[nid] = constraints.GetNodeConstraints()[nid];
ASSERT_EQ(d_node[nid].Bits().size(), 1);
Expand All @@ -141,7 +134,7 @@ TEST(GPUFeatureInteractionConstraint, Split) {

{
LBitField64 d_node[5];
constraints.Split(1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);
constraints.Split(&ctx, 1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);
for (auto nid : {1, 3, 4}) {
d_node[nid] = constraints.GetNodeConstraints()[nid];
CompareBitField(d_node[nid], {0, 1, 2});
Expand All @@ -165,24 +158,22 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) {
}

{
constraints.Split(/*node_id=*/ 0, /*feature_id=*/ 1, 1, 2);
constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/1, 1, 2);
auto span = constraints.QueryNode(&ctx, 0);
std::vector<bst_feature_t> h_result (span.size());
std::vector<bst_feature_t> h_result(span.size());
thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),
thrust::device_ptr<bst_feature_t>(span.data() + span.size()),
h_result.begin());
thrust::device_ptr<bst_feature_t>(span.data() + span.size()), h_result.begin());
ASSERT_EQ(h_result.size(), 2);
ASSERT_EQ(h_result[0], 1);
ASSERT_EQ(h_result[1], 2);
}

{
constraints.Split(1, /*feature_id=*/0, 3, 4);
constraints.Split(&ctx, 1, /*feature_id=*/0, 3, 4);
auto span = constraints.QueryNode(&ctx, 1);
std::vector<bst_feature_t> h_result (span.size());
std::vector<bst_feature_t> h_result(span.size());
thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),
thrust::device_ptr<bst_feature_t>(span.data() + span.size()),
h_result.begin());
thrust::device_ptr<bst_feature_t>(span.data() + span.size()), h_result.begin());
ASSERT_EQ(h_result.size(), 3);
ASSERT_EQ(h_result[0], 0);
ASSERT_EQ(h_result[1], 1);
Expand All @@ -192,8 +183,7 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) {
span = constraints.QueryNode(&ctx, 3);
h_result.resize(span.size());
thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),
thrust::device_ptr<bst_feature_t>(span.data() + span.size()),
h_result.begin());
thrust::device_ptr<bst_feature_t>(span.data() + span.size()), h_result.begin());
ASSERT_EQ(h_result.size(), 3);
ASSERT_EQ(h_result[0], 0);
ASSERT_EQ(h_result[1], 1);
Expand All @@ -204,12 +194,11 @@ TEST(GPUFeatureInteractionConstraint, QueryNode) {
tree::TrainParam large_param = GetParameter();
large_param.interaction_constraints = R"([[1, 139], [244, 0], [139, 221]])";
FConstraintWrapper large_features(large_param, 256);
large_features.Split(0, 139, 1, 2);
large_features.Split(&ctx, 0, 139, 1, 2);
auto span = large_features.QueryNode(&ctx, 0);
std::vector<bst_feature_t> h_result (span.size());
std::vector<bst_feature_t> h_result(span.size());
thrust::copy(thrust::device_ptr<bst_feature_t>(span.data()),
thrust::device_ptr<bst_feature_t>(span.data() + span.size()),
h_result.begin());
thrust::device_ptr<bst_feature_t>(span.data() + span.size()), h_result.begin());
ASSERT_EQ(h_result.size(), 3);
ASSERT_EQ(h_result[0], 1);
ASSERT_EQ(h_result[1], 139);
Expand All @@ -230,12 +219,13 @@ void CompareFeatureList(common::Span<bst_feature_t const> s_output,
} // anonymous namespace

TEST(GPUFeatureInteractionConstraint, Query) {
auto ctx = MakeCUDACtx(0);
{
tree::TrainParam param = GetParameter();
bst_feature_t constexpr kFeatures = 6;
FConstraintWrapper constraints(param, kFeatures);
std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);
std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list(h_input_feature_list);
common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);

auto s_output = constraints.Query(s_input_feature_list, 0);
Expand All @@ -245,9 +235,9 @@ TEST(GPUFeatureInteractionConstraint, Query) {
tree::TrainParam param = GetParameter();
bst_feature_t constexpr kFeatures = 6;
FConstraintWrapper constraints(param, kFeatures);
constraints.Split(/*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2);
constraints.Split(/*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);
constraints.Split(/*node_id=*/4, /*feature_id=*/3, /*left_id=*/5, /*right_id=*/6);
constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2);
constraints.Split(&ctx, /*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4);
constraints.Split(&ctx, /*node_id=*/4, /*feature_id=*/3, /*left_id=*/5, /*right_id=*/6);
/*
* (node id) [allowed features]
*
Expand All @@ -263,8 +253,8 @@ TEST(GPUFeatureInteractionConstraint, Query) {
*
*/

std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);
std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list(h_input_feature_list);
common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);

auto s_output = constraints.Query(s_input_feature_list, 1);
Expand All @@ -289,10 +279,10 @@ TEST(GPUFeatureInteractionConstraint, Query) {
param.interaction_constraints = constraints_str;

FConstraintWrapper constraints(param, kFeatures);
constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);
constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);

std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);
std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list(h_input_feature_list);
common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);

auto s_output = constraints.Query(s_input_feature_list, 1);
Expand All @@ -306,10 +296,10 @@ TEST(GPUFeatureInteractionConstraint, Query) {
std::string const constraints_str = R"constraint([[0, 1]])constraint";
param.interaction_constraints = constraints_str;
FConstraintWrapper constraints(param, kFeatures);
std::vector<bst_feature_t> h_input_feature_list {0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list (h_input_feature_list);
std::vector<bst_feature_t> h_input_feature_list{0, 1, 2, 3, 4, 5};
dh::device_vector<bst_feature_t> d_input_feature_list(h_input_feature_list);
common::Span<bst_feature_t> s_input_feature_list = dh::ToSpan(d_input_feature_list);
constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);
constraints.Split(&ctx, /*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2);
auto s_output = constraints.Query(s_input_feature_list, 1);
CompareFeatureList(s_output, {2});
s_output = constraints.Query(s_input_feature_list, 2);
Expand Down
Loading