@@ -151,8 +151,7 @@ queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
151151 return MutableVec;
152152}
153153
154- event queue_impl::memset (const std::shared_ptr<detail::queue_impl> &Self,
155- void *Ptr, int Value, size_t Count,
154+ event queue_impl::memset (void *Ptr, int Value, size_t Count,
156155 const std::vector<event> &DepEvents,
157156 bool CallerNeedsEvent) {
158157#if XPTI_ENABLE_INSTRUMENTATION
@@ -180,7 +179,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
180179#endif
181180 const std::vector<unsigned char > Pattern{static_cast <unsigned char >(Value)};
182181 return submitMemOpHelper (
183- Self, DepEvents, CallerNeedsEvent,
182+ DepEvents, CallerNeedsEvent,
184183 [&](handler &CGH ) { CGH .memset (Ptr, Value, Count); },
185184 MemoryManager::fill_usm, Ptr, *this , Count, Pattern);
186185}
@@ -198,8 +197,7 @@ void report(const code_location &CodeLoc) {
198197 std::cout << ' \n ' ;
199198}
200199
201- event queue_impl::memcpy (const std::shared_ptr<detail::queue_impl> &Self,
202- void *Dest, const void *Src, size_t Count,
200+ event queue_impl::memcpy (void *Dest, const void *Src, size_t Count,
203201 const std::vector<event> &DepEvents,
204202 bool CallerNeedsEvent, const code_location &CodeLoc) {
205203#if XPTI_ENABLE_INSTRUMENTATION
@@ -231,28 +229,28 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
231229 " NULL pointer argument in memory copy operation." );
232230 }
233231 return submitMemOpHelper (
234- Self, DepEvents, CallerNeedsEvent,
232+ DepEvents, CallerNeedsEvent,
235233 [&](handler &CGH ) { CGH .memcpy (Dest, Src, Count); },
236234 MemoryManager::copy_usm, Src, *this , Count, Dest);
237235}
238236
239- event queue_impl::mem_advise (const std::shared_ptr<detail::queue_impl> &Self,
240- const void *Ptr, size_t Length,
237+ event queue_impl::mem_advise (const void *Ptr, size_t Length,
241238 ur_usm_advice_flags_t Advice,
242239 const std::vector<event> &DepEvents,
243240 bool CallerNeedsEvent) {
244241 return submitMemOpHelper (
245- Self, DepEvents, CallerNeedsEvent,
242+ DepEvents, CallerNeedsEvent,
246243 [&](handler &CGH ) { CGH .mem_advise (Ptr, Length, Advice); },
247244 MemoryManager::advise_usm, Ptr, *this , Length, Advice);
248245}
249246
250- event queue_impl::memcpyToDeviceGlobal (
251- const std::shared_ptr<detail::queue_impl> &Self, void *DeviceGlobalPtr,
252- const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset,
253- const std::vector<event> &DepEvents, bool CallerNeedsEvent) {
247+ event queue_impl::memcpyToDeviceGlobal (void *DeviceGlobalPtr, const void *Src,
248+ bool IsDeviceImageScope, size_t NumBytes,
249+ size_t Offset,
250+ const std::vector<event> &DepEvents,
251+ bool CallerNeedsEvent) {
254252 return submitMemOpHelper (
255- Self, DepEvents, CallerNeedsEvent,
253+ DepEvents, CallerNeedsEvent,
256254 [&](handler &CGH ) {
257255 CGH .memcpyToDeviceGlobal (DeviceGlobalPtr, Src, IsDeviceImageScope,
258256 NumBytes, Offset);
@@ -261,12 +259,14 @@ event queue_impl::memcpyToDeviceGlobal(
261259 *this , NumBytes, Offset, Src);
262260}
263261
264- event queue_impl::memcpyFromDeviceGlobal (
265- const std::shared_ptr<detail::queue_impl> &Self, void *Dest,
266- const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes,
267- size_t Offset, const std::vector<event> &DepEvents, bool CallerNeedsEvent) {
262+ event queue_impl::memcpyFromDeviceGlobal (void *Dest,
263+ const void *DeviceGlobalPtr,
264+ bool IsDeviceImageScope,
265+ size_t NumBytes, size_t Offset,
266+ const std::vector<event> &DepEvents,
267+ bool CallerNeedsEvent) {
268268 return submitMemOpHelper (
269- Self, DepEvents, CallerNeedsEvent,
269+ DepEvents, CallerNeedsEvent,
270270 [&](handler &CGH ) {
271271 CGH .memcpyFromDeviceGlobal (Dest, DeviceGlobalPtr, IsDeviceImageScope,
272272 NumBytes, Offset);
@@ -275,8 +275,7 @@ event queue_impl::memcpyFromDeviceGlobal(
275275 IsDeviceImageScope, *this , NumBytes, Offset, Dest);
276276}
277277
278- sycl::detail::optional<event>
279- queue_impl::getLastEvent (const std::shared_ptr<queue_impl> &Self) {
278+ sycl::detail::optional<event> queue_impl::getLastEvent () {
280279 // The external event is required to finish last if set, so it is considered
281280 // the last event if present.
282281 if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read ())
@@ -291,7 +290,7 @@ queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
291290 if (LastEvent)
292291 return detail::createSyclObjFromImpl<event>(LastEvent);
293292 // We insert a marker to represent an event at end.
294- return detail::createSyclObjFromImpl<event>(insertMarkerEvent (Self ));
293+ return detail::createSyclObjFromImpl<event>(insertMarkerEvent ());
295294}
296295
297296void queue_impl::addEvent (const detail::EventImplPtr &EventImpl) {
@@ -307,16 +306,18 @@ void queue_impl::addEvent(const detail::EventImplPtr &EventImpl) {
307306
308307detail::EventImplPtr
309308queue_impl::submit_impl (const detail::type_erased_cgfo_ty &CGF ,
310- const std::shared_ptr<queue_impl> &Self,
311309 queue_impl *SecondaryQueue, bool CallerNeedsEvent,
312310 const detail::code_location &Loc, bool IsTopCodeLoc,
313311 const v1::SubmissionInfo &SubmitInfo) {
314312#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
315313 detail::handler_impl HandlerImplVal (SecondaryQueue, CallerNeedsEvent);
316314 detail::handler_impl *HandlerImpl = &HandlerImplVal;
315+ // Inlining `Self` results in a crash when SYCL RT is built using MSVC with
316+ // optimizations enabled. No crash if built using OneAPI.
317+ auto Self = shared_from_this ();
317318 handler Handler (HandlerImpl, Self);
318319#else
319- handler Handler (Self , SecondaryQueue, CallerNeedsEvent);
320+ handler Handler (shared_from_this () , SecondaryQueue, CallerNeedsEvent);
320321 auto &HandlerImpl = detail::getSyclObjImpl (Handler);
321322#endif
322323
@@ -398,9 +399,8 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
398399 Stream->generateFlushCommand (ServiceCGH);
399400 };
400401 detail::type_erased_cgfo_ty CGF {L};
401- detail::EventImplPtr FlushEvent =
402- submit_impl (CGF , Self, SecondaryQueue, /* CallerNeedsEvent*/ true , Loc,
403- IsTopCodeLoc, {});
402+ detail::EventImplPtr FlushEvent = submit_impl (
403+ CGF , SecondaryQueue, /* CallerNeedsEvent*/ true , Loc, IsTopCodeLoc, {});
404404 if (EventImpl)
405405 EventImpl->attachEventToCompleteWeak (FlushEvent);
406406 registerStreamServiceEvent (FlushEvent);
@@ -412,19 +412,17 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
412412#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
413413detail::EventImplPtr
414414queue_impl::submit_impl (const detail::type_erased_cgfo_ty &CGF ,
415- const std::shared_ptr<queue_impl> &Self,
416- const std::shared_ptr<queue_impl> &,
415+ const std::shared_ptr<queue_impl> & /* PrimaryQueue*/ ,
417416 const std::shared_ptr<queue_impl> &SecondaryQueue,
418417 bool CallerNeedsEvent, const detail::code_location &Loc,
419418 bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) {
420- return submit_impl (CGF , Self, SecondaryQueue.get (), CallerNeedsEvent, Loc,
419+ return submit_impl (CGF , SecondaryQueue.get (), CallerNeedsEvent, Loc,
421420 IsTopCodeLoc, SubmitInfo);
422421}
423422#endif
424423
425424template <typename HandlerFuncT>
426- event queue_impl::submitWithHandler (const std::shared_ptr<queue_impl> &Self,
427- const std::vector<event> &DepEvents,
425+ event queue_impl::submitWithHandler (const std::vector<event> &DepEvents,
428426 bool CallerNeedsEvent,
429427 HandlerFuncT HandlerFunc) {
430428 v1::SubmissionInfo SI {};
@@ -435,17 +433,16 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
435433 detail::type_erased_cgfo_ty CGF {L};
436434
437435 if (!CallerNeedsEvent && supportsDiscardingPiEvents ()) {
438- submit_without_event (CGF , Self, SI ,
436+ submit_without_event (CGF , SI ,
439437 /* CodeLoc*/ {}, /* IsTopCodeLoc*/ true );
440438 return createDiscardedEvent ();
441439 }
442- return submit_with_event (CGF , Self, SI ,
440+ return submit_with_event (CGF , SI ,
443441 /* CodeLoc*/ {}, /* IsTopCodeLoc*/ true );
444442}
445443
446444template <typename HandlerFuncT, typename MemOpFuncT, typename ... MemOpArgTs>
447- event queue_impl::submitMemOpHelper (const std::shared_ptr<queue_impl> &Self,
448- const std::vector<event> &DepEvents,
445+ event queue_impl::submitMemOpHelper (const std::vector<event> &DepEvents,
449446 bool CallerNeedsEvent,
450447 HandlerFuncT HandlerFunc,
451448 MemOpFuncT MemOpFunc,
@@ -475,7 +472,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
475472 return createDiscardedEvent ();
476473 }
477474
478- event ResEvent = prepareSYCLEventAssociatedWithQueue (Self );
475+ event ResEvent = prepareSYCLEventAssociatedWithQueue (shared_from_this () );
479476 const auto &EventImpl = detail::getSyclObjImpl (ResEvent);
480477 {
481478 NestedCallsTracker tracker;
@@ -508,7 +505,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
508505 return ResEvent;
509506 }
510507 }
511- return submitWithHandler (Self, DepEvents, CallerNeedsEvent, HandlerFunc);
508+ return submitWithHandler (DepEvents, CallerNeedsEvent, HandlerFunc);
512509}
513510
514511void *queue_impl::instrumentationProlog (const detail::code_location &CodeLoc,
0 commit comments