@@ -196,9 +196,11 @@ def benchmarks(self) -> list[Benchmark]:
196196 # Add UR-specific benchmarks
197197 if options .ur is not None :
198198 benches += [
199- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
200- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
201- MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
199+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 , 1 ),
200+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 , 1 ),
201+ MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 1 ),
202+ MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 0 ),
203+ MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 , 1 ),
202204 UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 , "Both" ),
203205 UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 * 1024 , "Both" ),
204206 UsmBatchMemoryAllocation (self , RUNTIMES .UR , "Device" , 128 , 256 , "Both" ),
@@ -538,6 +540,7 @@ def __init__(
538540 srcUSM ,
539541 dstUSM ,
540542 useEvent ,
543+ useCopyOffload ,
541544 ):
542545 self .numOpsPerThread = numOpsPerThread
543546 self .numThreads = numThreads
@@ -546,22 +549,31 @@ def __init__(
546549 self .srcUSM = srcUSM
547550 self .dstUSM = dstUSM
548551 self .useEvents = useEvent
552+ self .useCopyOffload = useCopyOffload
549553 super ().__init__ (bench , "multithread_benchmark_ur" , "MemcpyExecute" )
550554
555+ def extra_env_vars (self ) -> dict :
556+ if not self .useCopyOffload :
557+ return {"UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD" : "1" }
558+ else :
559+ return {}
560+
551561 def name (self ):
552562 return (
553563 f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
554564 + (" without events" if not self .useEvents else "" )
565+ + (" without copy offload" if not self .useCopyOffload else "" )
555566 )
556567
557568 def description (self ) -> str :
558569 src_type = "device" if self .srcUSM == 1 else "host"
559570 dst_type = "device" if self .dstUSM == 1 else "host"
560571 events = "with" if self .useEvents else "without"
572+ copy_offload = "with" if self .useCopyOffload else "without"
561573 return (
562574 f"Measures multithreaded memory copy performance with { self .numThreads } threads "
563575 f"each performing { self .numOpsPerThread } operations on { self .allocSize } bytes "
564- f"from { src_type } to { dst_type } memory { events } events."
576+ f"from { src_type } to { dst_type } memory { events } events { copy_offload } driver copy offload ."
565577 )
566578
567579 def get_tags (self ):
0 commit comments