@@ -208,25 +208,17 @@ def benchmarks(self) -> list[Benchmark]:
208208 measure_completion_time ,
209209 use_events ,
210210 ) in submit_graph_params :
211- # Non-sycl runtimes have to be run with emulated graphs,
212- # see: https://github.com/intel/compute-benchmarks/commit/d81d5d602739482b9070c872a28c0b5ebb41de70
213- emulate_graphs = (
214- 0 if runtime in (RUNTIMES .SYCL , RUNTIMES .SYCL_PREVIEW ) else 1
215- )
216- benches .append (
217- GraphApiSubmitGraph (
218- self ,
219- runtime ,
220- in_order_queue ,
221- num_kernels ,
222- measure_completion_time ,
223- use_events ,
224- emulate_graphs ,
225- useHostTasks = 0 ,
226- )
227- )
228- if runtime == RUNTIMES .SYCL :
229- # Create CPU count variant
211+ # SYCL only supports graph mode, UR supports only emulation with command buffers,
212+ # and L0 supports both modes via graph and command list APIs.
213+ if runtime == RUNTIMES .SYCL or runtime == RUNTIMES .SYCL_PREVIEW :
214+ emulate_graphs = [0 ]
215+ elif runtime == RUNTIMES .UR :
216+ emulate_graphs = [1 ]
217+ else : # level-zero
218+ # SubmitGraph with L0 graph segfaults on PVC
219+ device_arch = getattr (options , "device_architecture" , "" )
220+ emulate_graphs = [1 ] if "pvc" in device_arch else [0 , 1 ]
221+ for emulate_graph in emulate_graphs :
230222 benches .append (
231223 GraphApiSubmitGraph (
232224 self ,
@@ -235,11 +227,25 @@ def benchmarks(self) -> list[Benchmark]:
235227 num_kernels ,
236228 measure_completion_time ,
237229 use_events ,
238- emulate_graphs ,
230+ emulate_graph ,
239231 useHostTasks = 0 ,
240- profiler_type = PROFILERS .CPU_COUNTER ,
241232 )
242233 )
234+ if runtime == RUNTIMES .SYCL :
235+ # Create CPU count variant
236+ benches .append (
237+ GraphApiSubmitGraph (
238+ self ,
239+ runtime ,
240+ in_order_queue ,
241+ num_kernels ,
242+ measure_completion_time ,
243+ use_events ,
244+ emulate_graph ,
245+ useHostTasks = 0 ,
246+ profiler_type = PROFILERS .CPU_COUNTER ,
247+ )
248+ )
243249
244250 # Add other benchmarks
245251 benches += [
@@ -1178,6 +1184,7 @@ def __init__(
11781184 self ._use_events = useEvents
11791185 self ._use_host_tasks = useHostTasks
11801186 self ._emulate_graphs = emulate_graphs
1187+ self ._emulate_str = " with graph emulation" if self ._emulate_graphs else ""
11811188 self ._ioq_str = "in order" if self ._in_order_queue else "out of order"
11821189 self ._measure_str = (
11831190 " with measure completion" if self ._measure_completion_time else ""
@@ -1196,10 +1203,10 @@ def __init__(
11961203 )
11971204
11981205 def name (self ):
1199- return f"graph_api_benchmark_{ self ._runtime .value } SubmitGraph{ self ._use_events_str } { self ._host_tasks_str } numKernels:{ self ._num_kernels } ioq { self ._in_order_queue } measureCompletion { self ._measure_completion_time } { self ._cpu_count_str ()} "
1206+ return f"graph_api_benchmark_{ self ._runtime .value } SubmitGraph{ self ._use_events_str } { self ._host_tasks_str } { self . _emulate_str } numKernels:{ self ._num_kernels } ioq { self ._in_order_queue } measureCompletion { self ._measure_completion_time } { self ._cpu_count_str ()} "
12001207
12011208 def display_name (self ) -> str :
1202- return f"{ self ._runtime .value .upper ()} SubmitGraph { self ._ioq_str } { self ._measure_str } { self ._use_events_str } { self ._host_tasks_str } , { self ._num_kernels } kernels{ self ._cpu_count_str (separator = ',' )} "
1209+ return f"{ self ._runtime .value .upper ()} SubmitGraph { self ._ioq_str } { self ._measure_str } { self ._use_events_str } { self ._host_tasks_str } { self . _emulate_str } , { self ._num_kernels } kernels{ self ._cpu_count_str (separator = ',' )} "
12031210
12041211 def explicit_group (self ):
12051212 return f"SubmitGraph { self ._ioq_str } { self ._measure_str } { self ._use_events_str } { self ._host_tasks_str } , { self ._num_kernels } kernels{ self ._cpu_count_str (separator = ',' )} "
0 commit comments