Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/LowerWGLocalMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &);
namespace sycl {
std::vector<std::pair<StringRef, int>>
getKernelNamesUsingImplicitLocalMem(const Module &M);
}

SmallVector<StringRef> getKernelNamesUsingWorkGroupDynamicMem(const Module &M);
} // namespace sycl

} // namespace llvm

Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ class PropertySetRegistry {
static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes";
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
static constexpr char SYCL_IMPLICIT_LOCAL_ARG[] = "SYCL/implicit local arg";
static constexpr char SYCL_WORK_GROUP_DYNAMIC_LOCAL_MEM[] =
Comment thread
YuriPlyakhin marked this conversation as resolved.
"SYCL/work group dynamic local mem";
static constexpr char SYCL_REGISTERED_KERNELS[] = "SYCL/registered kernels";

static constexpr char PROPERTY_REQD_WORK_GROUP_SIZE[] =
Expand Down
20 changes: 19 additions & 1 deletion llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ static constexpr char DYNAMIC_LOCALMEM_GV[] =
"__sycl_dynamicLocalMemoryPlaceholder_GV";
static constexpr char WORK_GROUP_STATIC_ATTR[] = "sycl-work-group-static";
static constexpr char WORK_GROUP_STATIC_ARG_ATTR[] = "sycl-implicit-local-arg";
static constexpr char WORK_GROUP_SCRATCH_ATTR[] = "sycl-work-group-scratch";
Comment thread
lbushi25 marked this conversation as resolved.

namespace {
class SYCLLowerWGLocalMemoryLegacy : public ModulePass {
Expand Down Expand Up @@ -78,6 +79,18 @@ sycl::getKernelNamesUsingImplicitLocalMem(const Module &M) {
return SPIRKernelNames;
}

SmallVector<StringRef>
sycl::getKernelNamesUsingWorkGroupDynamicMem(const Module &M) {
SmallVector<StringRef> SPIRKernelNames;
llvm::for_each(M.functions(), [&](const Function &F) {
if (F.getCallingConv() == CallingConv::SPIR_KERNEL &&
F.hasFnAttribute(WORK_GROUP_SCRATCH_ATTR)) {
SPIRKernelNames.emplace_back(F.getName());
}
});
return SPIRKernelNames;
}

char SYCLLowerWGLocalMemoryLegacy::ID = 0;
INITIALIZE_PASS(SYCLLowerWGLocalMemoryLegacy, "sycllowerwglocalmemory",
"Replace __sycl_allocateLocalMemory with allocation of memory "
Expand Down Expand Up @@ -143,7 +156,7 @@ lowerDynamicLocalMemCallDirect(CallInst *CI, Triple TT,

static void lowerLocalMemCall(Function *LocalMemAllocFunc,
std::function<void(CallInst *CI)> TransformCall) {
static SmallPtrSet<Function *, 16> FuncsCache;
SmallPtrSet<Function *, 16> FuncsCache;
SmallVector<CallInst *, 4> DelCalls;
for (User *U : LocalMemAllocFunc->users()) {
auto *CI = cast<CallInst>(U);
Expand All @@ -169,6 +182,11 @@ static void lowerLocalMemCall(Function *LocalMemAllocFunc,
!F->hasFnAttribute(WORK_GROUP_STATIC_ATTR))
F->addFnAttr(WORK_GROUP_STATIC_ATTR);

if (F->getCallingConv() == CallingConv::SPIR_KERNEL &&
LocalMemAllocFunc->getName() == SYCL_DYNAMIC_LOCALMEM_CALL &&
!F->hasFnAttribute(WORK_GROUP_SCRATCH_ATTR))
F->addFnAttr(WORK_GROUP_SCRATCH_ATTR);

for (auto *FU : F->users()) {
if (auto *UCI = dyn_cast<CallInst>(FU)) {
if (FuncsCache.insert(UCI->getFunction()).second)
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,12 @@ PropSetRegTy computeModuleProperties(const Module &M,
FuncAndArgPos.second);
}

{
SmallVector<StringRef> Kernels = getKernelNamesUsingWorkGroupDynamicMem(M);
for (const auto &Kernel : Kernels)
PropSet.add(PropSetRegTy::SYCL_WORK_GROUP_DYNAMIC_LOCAL_MEM, Kernel, 1);
}

{
if (isModuleUsingAsan(M))
PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "sanUsed", "asan");
Expand Down
27 changes: 23 additions & 4 deletions llvm/test/SYCLLowerIR/work_group_static.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ target triple = "spir64-unknown-unknown"
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr local_unnamed_addr addrspace(3) global ptr addrspace(3) poison

; Function Attrs: convergent norecurse
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} #[[FULL_DYNAMIC_MEM_ATTRS:[0-9]+]] !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]
define weak_odr dso_local spir_kernel void @_ZTS7KernelA(ptr addrspace(1) %0) local_unnamed_addr #0 !kernel_arg_addr_space !5 {
entry:
; CHECK: store ptr addrspace(3) %[[IMPLICT_ARG]], ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder_GV
Expand All @@ -22,28 +22,47 @@ entry:
}

; Function Attrs: convergent norecurse
; CHECK: @__sycl_kernel_B{{.*}} #[[ATTRS:[0-9]+]]
; CHECK: @__sycl_kernel_B{{.*}} #[[DYNAMIC_MEM_ATTRS:[0-9]+]]
define weak_odr dso_local spir_kernel void @__sycl_kernel_B(ptr addrspace(1) %0) local_unnamed_addr #1 !kernel_arg_addr_space !5 {
entry:
%1 = tail call spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64 128) #1
ret void
}

; Function Attrs: convergent norecurse
; CHECK: @__sycl_kernel_C{{.*}} #[[ATTRS]]
; CHECK: @__sycl_kernel_C{{.*}} #[[DYNAMIC_MEM_ATTRS]]
Comment thread
lbushi25 marked this conversation as resolved.
define weak_odr dso_local spir_kernel void @__sycl_kernel_C(ptr addrspace(1) %0) local_unnamed_addr #1 !kernel_arg_addr_space !5 {
entry:
%1 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 128, i64 4) #1
%2 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemoryIndirect() #1
ret void
}

; CHECK-NOT: "sycl-work-group-scratch"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this check-not is always true and can be removed.
; CHECK: @__sycl_kernel_D{{.*}} #[[STATIC_MEM_ATTRS:[0-9]+]] covers what needs to be checked.

; Function Attrs: convergent norecurse
; CHECK: @__sycl_kernel_D{{.*}} #[[STATIC_MEM_ATTRS:[0-9]+]]
define weak_odr dso_local spir_kernel void @__sycl_kernel_D(ptr addrspace(1) %0) local_unnamed_addr #1 !kernel_arg_addr_space !5 {
entry:
%1 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 128, i64 4) #1
ret void
}

; Function Attrs: convergent
define internal spir_func ptr addrspace(3) @__sycl_allocateLocalMemoryIndirect() {
entry:
%1 = tail call spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64 128) #1
ret ptr addrspace(3) %1
}

; Function Attrs: convergent
declare dso_local spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64, i64) local_unnamed_addr #1

; Function Attrs: convergent
declare dso_local spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64) local_unnamed_addr #1

; CHECK: #[[ATTRS]] = {{.*}} "sycl-work-group-static"
; CHECK: #[[FULL_DYNAMIC_MEM_ATTRS]] = {{.*}} "sycl-work-group-scratch" "sycl-work-group-static"
; CHECK: #[[DYNAMIC_MEM_ATTRS]] = {{.*}} "sycl-work-group-scratch" "sycl-work-group-static"
; CHECK: #[[STATIC_MEM_ATTRS]] = {{.*}} "sycl-work-group-static"
attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" "sycl-work-group-static"="1" }
attributes #1 = { convergent norecurse }

Comment thread
lbushi25 marked this conversation as resolved.
Expand Down
17 changes: 17 additions & 0 deletions sycl/doc/design/PropertySets.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,23 @@ have an implicit local memory argument.
local memory argument.


### [SYCL/work group dynamic local mem]

__Key:__ Kernel name.

__Value type:__ 32 bit integer. ("1")

__Value:__ 1 if the kernel allocates work group scratch memory either directly
or by way of helper functions and 0 or missing otherwise.

__Notes:__

1. If no entry is present for a given kernel in the binary, the kernel does not
allocate work group scratch memory.
2. If this property set is missing, no kernels in the binary allocate work
group scratch memory.


### [SYCL/registered kernels]

__Key:__ "Registered" kernel name.
Expand Down
24 changes: 12 additions & 12 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ class __SYCL_EXPORT handler {
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
decltype(Wrapper)>::value) {
SetKernelLaunchpropertiesIfNotEmpty(detail::extractKernelProperties(
setKernelLaunchProperties(detail::extractKernelProperties(
Comment thread
lbushi25 marked this conversation as resolved.
Wrapper.get(ext::oneapi::experimental::properties_tag{})));
}

Expand Down Expand Up @@ -895,14 +895,13 @@ class __SYCL_EXPORT handler {
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) {
SetKernelLaunchpropertiesIfNotEmpty(
detail::extractKernelProperties<Info.IsESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{})));
setKernelLaunchProperties(detail::extractKernelProperties<Info.IsESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{})));
}

#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(Info.Name);
SetKernelLaunchpropertiesIfNotEmpty(
setKernelLaunchProperties(
detail::extractKernelProperties<Info.IsESIMD>(Props));
detail::checkValueRange<Dims>(UserRange);
convertToRangeViewAndSetDescriptor(std::move(UserRange));
Expand Down Expand Up @@ -932,7 +931,7 @@ class __SYCL_EXPORT handler {
setDeviceKernelInfo(std::move(Kernel));
detail::checkValueRange<Dims>(NumWorkItems);
convertToRangeViewAndSetDescriptor(std::move(NumWorkItems));
SetKernelLaunchpropertiesIfNotEmpty(detail::extractKernelProperties(Props));
setKernelLaunchProperties(detail::extractKernelProperties(Props));
extractArgsAndReqs();
#endif
}
Expand All @@ -955,7 +954,7 @@ class __SYCL_EXPORT handler {
setDeviceKernelInfo(std::move(Kernel));
detail::checkValueRange<Dims>(NDRange);
convertToRangeViewAndSetDescriptor(std::move(NDRange));
SetKernelLaunchpropertiesIfNotEmpty(detail::extractKernelProperties(Props));
setKernelLaunchProperties(detail::extractKernelProperties(Props));
extractArgsAndReqs();
#endif
}
Expand All @@ -978,9 +977,8 @@ class __SYCL_EXPORT handler {

if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<const KernelType &>::value) {
SetKernelLaunchpropertiesIfNotEmpty(
detail::extractKernelProperties<Info.IsESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{})));
setKernelLaunchProperties(detail::extractKernelProperties<Info.IsESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{})));
}

#ifndef __SYCL_DEVICE_ONLY__
Expand All @@ -999,7 +997,7 @@ class __SYCL_EXPORT handler {
}

StoreLambda<NameT, KernelType, Dims, ElementType>(std::move(KernelFunc));
SetKernelLaunchpropertiesIfNotEmpty(
setKernelLaunchProperties(
detail::extractKernelProperties<Info.IsESIMD>(Props));
#endif
}
Expand Down Expand Up @@ -2856,15 +2854,17 @@ class __SYCL_EXPORT handler {
void setKernelLaunchProperties(
const detail::KernelPropertyHolderStructTy &KernelLaunchProperties);

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
inline constexpr void SetKernelLaunchpropertiesIfNotEmpty(
const detail::KernelPropertyHolderStructTy &KernelLaunchProperties) {
(void)KernelLaunchProperties;

#ifndef __SYCL_DEVICE_ONLY__
if (!KernelLaunchProperties.isEmpty())
setKernelLaunchProperties(KernelLaunchProperties);
#endif
#endif // __SYCL_DEVICE_ONLY
}
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

// Various checks that are only meaningful for host compilation, because they
// result in runtime errors (i.e. exceptions being thrown). To save time
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
#define __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS "SYCL/virtual functions"
/// PropertySetRegistry::SYCL_IMPLICIT_LOCAL_ARG defined in PropertySetIO.h
#define __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG "SYCL/implicit local arg"
/// PropertySetRegistry::SYCL_WORK_GROUP_DYNAMIC_LOCAL_MEM defined in
/// PropertySetIO.h
#define __SYCL_PROPERTY_SET_SYCL_WORK_GROUP_DYNAMIC_LOCAL_MEM \
"SYCL/work group dynamic local mem"
/// PropertySetRegistry::SYCL_REGISTERED_KERNELS defined in PropertySetIO.h
#define __SYCL_PROPERTY_SET_SYCL_REGISTERED_KERNELS "SYCL/registered kernels"

Expand Down
23 changes: 18 additions & 5 deletions sycl/source/detail/device_binary_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) {
DeviceLibMetadata.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA);
KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO);
ImplicitLocalArg.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG);
WorkGroupDynamicLocalMem.init(
Bin, __SYCL_PROPERTY_SET_SYCL_WORK_GROUP_DYNAMIC_LOCAL_MEM);
ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA);
// Convert ProgramMetadata into the UR format
for (const auto &Prop : ProgramMetadata) {
Expand Down Expand Up @@ -526,6 +528,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
return Img.getImplicitLocalArg();
});
auto MergedWorkGroupDynamicLocalMem =
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
return Img.getWorkGroupDynamicLocalMem();
});

auto MergedKernelNames =
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) {
return Img.getKernelNames();
Expand All @@ -539,11 +546,16 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
return Img.getRegisteredKernels();
});

std::array<const std::vector<sycl_device_binary_property> *, 9> MergedVecs{
&MergedSpecConstants, &MergedSpecConstantsDefaultValues,
&MergedKernelParamOptInfo, &MergedDeviceGlobals,
&MergedVirtualFunctions, &MergedImplicitLocalArg,
&MergedKernelNames, &MergedExportedSymbols,
std::array<const std::vector<sycl_device_binary_property> *, 10> MergedVecs{
&MergedSpecConstants,
&MergedSpecConstantsDefaultValues,
&MergedKernelParamOptInfo,
&MergedDeviceGlobals,
&MergedVirtualFunctions,
&MergedImplicitLocalArg,
&MergedWorkGroupDynamicLocalMem,
&MergedKernelNames,
&MergedExportedSymbols,
&MergedRegisteredKernels};

// Exclusive merges.
Expand Down Expand Up @@ -662,6 +674,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage(
CopyPropertiesVec(MergedDeviceGlobals, DeviceGlobals);
CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions);
CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg);
CopyPropertiesVec(MergedWorkGroupDynamicLocalMem, WorkGroupDynamicLocalMem);
CopyPropertiesVec(MergedKernelNames, KernelNames);
CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols);
CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels);
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/device_binary_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ class RTDeviceBinaryImage {
}
const PropertyRange &getVirtualFunctions() const { return VirtualFunctions; }
const PropertyRange &getImplicitLocalArg() const { return ImplicitLocalArg; }
const PropertyRange &getWorkGroupDynamicLocalMem() const {
return WorkGroupDynamicLocalMem;
}
const PropertyRange &getRegisteredKernels() const {
return RegisteredKernels;
}
Expand Down Expand Up @@ -263,6 +266,7 @@ class RTDeviceBinaryImage {
RTDeviceBinaryImage::PropertyRange DeviceRequirements;
RTDeviceBinaryImage::PropertyRange VirtualFunctions;
RTDeviceBinaryImage::PropertyRange ImplicitLocalArg;
RTDeviceBinaryImage::PropertyRange WorkGroupDynamicLocalMem;
RTDeviceBinaryImage::PropertyRange RegisteredKernels;
RTDeviceBinaryImage::PropertyRange Misc;

Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) {
MImplicitLocalArgPos = Pos;
}

void DeviceKernelInfo::setWorkGroupDynamicLocalMem() {
MWorkGroupDynamicLocalMem = true;
}

std::string_view DeviceKernelInfo::getDemangledName() const {
std::call_once(MDemangledNameInitFlag, [&]() {
#ifdef __SYCL_ENABLE_GNU_DEMANGLING
Expand Down
7 changes: 7 additions & 0 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
return MImplicitLocalArgPos;
}

bool getWorkGroupDynamicLocalMem() const { return MWorkGroupDynamicLocalMem; }

const sycl::kernel_id &getKernelID() const {
// Expected to be called only for DeviceKernelInfo instances created by
// program manager (as opposed to allocated by sycl::kernel with
Expand All @@ -111,6 +113,10 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
// function allows setting it as more images are added.
void setImplicitLocalArgPos(int Pos);

// Indicates that this kernel uses dynamic work group local memory also
// known as work group scratch memory.
void setWorkGroupDynamicLocalMem();

int &getRefCount() { return RefCount; }

// Returns the demangled kernel name, caching the result to avoid repeated
Expand All @@ -122,6 +128,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {

FastKernelSubcacheT MFastKernelSubcache;
std::optional<int> MImplicitLocalArgPos;
bool MWorkGroupDynamicLocalMem = false;
const std::optional<sycl::kernel_id> MKernelID;
// Keeps track of binary image to kernel name reference count.
// Used for checking if the last image referencing the kernel name
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ class handler_impl {
std::vector<std::shared_ptr<detail::work_group_memory_impl>>
MWorkGroupMemoryObjects;

/// True if the work_group_scratch_size launch property is present for the
/// kernel
bool MHasWorkGroupScratchSizeProperty = false;

/// Potential event mode for the result event of the command.
ext::oneapi::experimental::event_mode_enum MEventMode =
ext::oneapi::experimental::event_mode_enum::none;
Expand Down
Loading
Loading