Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions plugin/sycl/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ ::sycl::event BuildHistDispatchKernel(
GHistRow<FPType, MemoryType::on_device>* hist,
bool isDense,
GHistRow<FPType, MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event events_priv,
bool force_atomic_use) {
const size_t size = row_indices.Size();
Expand Down Expand Up @@ -373,7 +373,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
const GHistIndexMatrix& gmat, const bool isDense,
GHistRow<FPType, MemoryType::on_device>* hist,
GHistRow<FPType, MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event event_priv,
bool force_atomic_use) {
const bool is_dense = isDense;
Expand Down Expand Up @@ -409,7 +409,7 @@ ::sycl::event GHistBuilder<GradientSumT>::BuildHist(
GHistRowT<MemoryType::on_device>* hist,
bool isDense,
GHistRowT<MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event event_priv,
bool force_atomic_use) {
return BuildHistKernel<GradientSumT>(qu_, gpair, row_indices, gmat,
Expand All @@ -426,7 +426,7 @@ ::sycl::event GHistBuilder<float>::BuildHist(
GHistRow<float, MemoryType::on_device>* hist,
bool isDense,
GHistRow<float, MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event event_priv,
bool force_atomic_use);
template
Expand All @@ -437,7 +437,7 @@ ::sycl::event GHistBuilder<double>::BuildHist(
GHistRow<double, MemoryType::on_device>* hist,
bool isDense,
GHistRow<double, MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event event_priv,
bool force_atomic_use);

Expand Down
2 changes: 1 addition & 1 deletion plugin/sycl/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class GHistBuilder {
GHistRowT<MemoryType::on_device>* HistCollection,
bool isDense,
GHistRowT<MemoryType::on_device>* hist_buffer,
const tree::DeviceProperties& device_prop,
const DeviceProperties& device_prop,
::sycl::event event,
bool force_atomic_use = false);

Expand Down
66 changes: 66 additions & 0 deletions plugin/sycl/device_properties.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*!
* Copyright 2017-2025 by Contributors
* \file device_properties.h
*/
#ifndef PLUGIN_SYCL_DEVICE_PROPERTIES_H_
#define PLUGIN_SYCL_DEVICE_PROPERTIES_H_

#include <sycl/sycl.hpp>
#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
#include "../../src/common/common.h" // for HumanMemUnit

namespace xgboost {
namespace sycl {

class DeviceProperties {
void GetL2Size(const ::sycl::device& device) {
l2_size = device.get_info<::sycl::info::device::global_mem_cache_size>();
LOG(INFO) << "Detected L2 Size = " << ::xgboost::common::HumanMemUnit(l2_size);
l2_size_per_eu = static_cast<float>(l2_size) / max_compute_units;
}

void GetSRAMSize(const ::sycl::device& device) {
auto arch =
device.get_info<::sycl::ext::oneapi::experimental::info::device::architecture>();
size_t eu_per_core =
device.get_info<::sycl::ext::intel::info::device::gpu_eu_count_per_subslice>();
switch (arch) {
case ::sycl::ext::oneapi::experimental::architecture::intel_gpu_pvc: {
LOG(INFO) << "Xe-HPC (Ponte Vecchio) Architecture. L1 friendly optimization enabled.";
size_t l1_size = 512 * 1024;
size_t registers_size = 64 * 1024;
sram_size_per_eu = l1_size / eu_per_core + registers_size;
break;
}
default:
sram_size_per_eu = 0;
}
}

public:
bool is_gpu;
bool usm_host_allocations;
size_t max_compute_units;
size_t max_work_group_size;
size_t sub_group_size;
float sram_size_per_eu = 0;
size_t l2_size = 0;
float l2_size_per_eu = 0;

explicit DeviceProperties(const ::sycl::device& device):
is_gpu(device.is_gpu()),
usm_host_allocations(device.has(::sycl::aspect::usm_host_allocations)),
max_compute_units(device.get_info<::sycl::info::device::max_compute_units>()),
max_work_group_size(device.get_info<::sycl::info::device::max_work_group_size>()),
sub_group_size(device.get_info<::sycl::info::device::sub_group_sizes>().back()) {
GetL2Size(device);
if (is_gpu) {
GetSRAMSize(device);
}
}
};

} // namespace sycl
} // namespace xgboost

#endif // PLUGIN_SYCL_DEVICE_PROPERTIES_H_
Loading
Loading