4040#include < dlfcn.h>
4141#endif
4242
43- namespace
44- {
45- #if defined( ORO_PRECOMPILED )
46- constexpr auto useBitCode = true ;
43+ #if defined( ORO_PRECOMPILED ) && defined( ORO_PP_LOAD_FROM_STRING )
44+ #include < ParallelPrimitives/cache/oro_compiled_kernels.h> // generate this header with 'convert_binary_to_array.py'
4745#else
48- constexpr auto useBitCode = false ;
46+ const unsigned char oro_compiled_kernels_h[] = " " ;
47+ const size_t oro_compiled_kernels_h_size = 0 ;
4948#endif
5049
51- #if defined( ORO_PP_LOAD_FROM_STRING )
52- constexpr auto useBakeKernel = true ;
53- #else
54- constexpr auto useBakeKernel = false ;
55- static const char * hip_RadixSortKernels = nullptr ;
56- namespace hip
50+ namespace
5751{
58- static const char ** RadixSortKernelsArgs = nullptr ;
59- static const char ** RadixSortKernelsIncludes = nullptr ;
60- } // namespace hip
52+
53+ // if those 2 preprocessors are enabled, this activates the 'usePrecompiledAndBakedKernel' mode.
54+ #if defined( ORO_PRECOMPILED ) && defined( ORO_PP_LOAD_FROM_STRING )
55+
56+ // this flag means that we bake the precompiled kernels
57+ constexpr auto usePrecompiledAndBakedKernel = true ;
58+
59+ constexpr auto useBitCode = false ;
60+ constexpr auto useBakeKernel = false ;
61+
62+ #else
63+
64+ constexpr auto usePrecompiledAndBakedKernel = false ;
65+
66+ #if defined( ORO_PRECOMPILED )
67+ constexpr auto useBitCode = true ; // this flag means we use the bitcode file
68+ #else
69+ constexpr auto useBitCode = false ;
70+ #endif
71+
72+ #if defined( ORO_PP_LOAD_FROM_STRING )
73+ constexpr auto useBakeKernel = true ; // this flag means we use the HIP source code embeded in the binary ( as a string )
74+ #else
75+ constexpr auto useBakeKernel = false ;
76+ static const char * hip_RadixSortKernels = nullptr ;
77+ namespace hip
78+ {
79+ static const char ** RadixSortKernelsArgs = nullptr ;
80+ static const char ** RadixSortKernelsIncludes = nullptr ;
81+ } // namespace hip
82+ #endif
83+
6184#endif
6285
6386static_assert ( !( useBitCode && useBakeKernel ), " useBitCode and useBakeKernel cannot coexist" );
@@ -211,9 +234,14 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
211234 opts.push_back ( sort_block_size_param.c_str () );
212235 opts.push_back ( sort_num_warps_param.c_str () );
213236
237+
214238 for ( const auto & record : records )
215239 {
216- if constexpr ( useBakeKernel )
240+ if constexpr ( usePrecompiledAndBakedKernel )
241+ {
242+ oroFunctions[record.kernelType ] = m_oroutils.getFunctionFromPrecompiledBinary_asData (oro_compiled_kernels_h, oro_compiled_kernels_h_size, record.kernelName .c_str () );
243+ }
244+ else if constexpr ( useBakeKernel )
217245 {
218246 oroFunctions[record.kernelType ] = m_oroutils.getFunctionFromString ( m_device, hip_RadixSortKernels, currentKernelPath.c_str (), record.kernelName .c_str (), &opts, 1 , hip::RadixSortKernelsArgs, hip::RadixSortKernelsIncludes );
219247 }
@@ -231,6 +259,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
231259 printKernelInfo ( record.kernelName , oroFunctions[record.kernelType ] );
232260 }
233261 }
262+
263+ return ;
234264}
235265
236266int RadixSort::calculateWGsToExecute ( const int blockSize ) const noexcept
0 commit comments