Skip to content

Commit 1472900

Browse files
authored
[Benchmarks] Add level-zero record-and-replay to set of SubmitGraph benchmarks (#20641)
Level-zero `SubmitGraph` benchmarks support EmulateGraphs=0 (L0 record-and-replay) APIs and EmulateGraphs=1 (submitting command list to immediate command list). This PR adds the record-and-replay compute benchmark.
1 parent 458202a commit 1472900

File tree

1 file changed

+30
-23
lines changed

1 file changed

+30
-23
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -208,25 +208,17 @@ def benchmarks(self) -> list[Benchmark]:
208208
measure_completion_time,
209209
use_events,
210210
) in submit_graph_params:
211-
# Non-sycl runtimes have to be run with emulated graphs,
212-
# see: https://github.com/intel/compute-benchmarks/commit/d81d5d602739482b9070c872a28c0b5ebb41de70
213-
emulate_graphs = (
214-
0 if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW) else 1
215-
)
216-
benches.append(
217-
GraphApiSubmitGraph(
218-
self,
219-
runtime,
220-
in_order_queue,
221-
num_kernels,
222-
measure_completion_time,
223-
use_events,
224-
emulate_graphs,
225-
useHostTasks=0,
226-
)
227-
)
228-
if runtime == RUNTIMES.SYCL:
229-
# Create CPU count variant
211+
# SYCL only supports graph mode, UR supports only emulation with command buffers,
212+
# and L0 supports both modes via graph and command list APIs.
213+
if runtime == RUNTIMES.SYCL or runtime == RUNTIMES.SYCL_PREVIEW:
214+
emulate_graphs = [0]
215+
elif runtime == RUNTIMES.UR:
216+
emulate_graphs = [1]
217+
else: # level-zero
218+
# SubmitGraph with L0 graph segfaults on PVC
219+
device_arch = getattr(options, "device_architecture", "")
220+
emulate_graphs = [1] if "pvc" in device_arch else [0, 1]
221+
for emulate_graph in emulate_graphs:
230222
benches.append(
231223
GraphApiSubmitGraph(
232224
self,
@@ -235,11 +227,25 @@ def benchmarks(self) -> list[Benchmark]:
235227
num_kernels,
236228
measure_completion_time,
237229
use_events,
238-
emulate_graphs,
230+
emulate_graph,
239231
useHostTasks=0,
240-
profiler_type=PROFILERS.CPU_COUNTER,
241232
)
242233
)
234+
if runtime == RUNTIMES.SYCL:
235+
# Create CPU count variant
236+
benches.append(
237+
GraphApiSubmitGraph(
238+
self,
239+
runtime,
240+
in_order_queue,
241+
num_kernels,
242+
measure_completion_time,
243+
use_events,
244+
emulate_graph,
245+
useHostTasks=0,
246+
profiler_type=PROFILERS.CPU_COUNTER,
247+
)
248+
)
243249

244250
# Add other benchmarks
245251
benches += [
@@ -1178,6 +1184,7 @@ def __init__(
11781184
self._use_events = useEvents
11791185
self._use_host_tasks = useHostTasks
11801186
self._emulate_graphs = emulate_graphs
1187+
self._emulate_str = " with graph emulation" if self._emulate_graphs else ""
11811188
self._ioq_str = "in order" if self._in_order_queue else "out of order"
11821189
self._measure_str = (
11831190
" with measure completion" if self._measure_completion_time else ""
@@ -1196,10 +1203,10 @@ def __init__(
11961203
)
11971204

11981205
def name(self):
1199-
return f"graph_api_benchmark_{self._runtime.value} SubmitGraph{self._use_events_str}{self._host_tasks_str} numKernels:{self._num_kernels} ioq {self._in_order_queue} measureCompletion {self._measure_completion_time}{self._cpu_count_str()}"
1206+
return f"graph_api_benchmark_{self._runtime.value} SubmitGraph{self._use_events_str}{self._host_tasks_str}{self._emulate_str} numKernels:{self._num_kernels} ioq {self._in_order_queue} measureCompletion {self._measure_completion_time}{self._cpu_count_str()}"
12001207

12011208
def display_name(self) -> str:
1202-
return f"{self._runtime.value.upper()} SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}"
1209+
return f"{self._runtime.value.upper()} SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}{self._emulate_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}"
12031210

12041211
def explicit_group(self):
12051212
return f"SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}"

0 commit comments

Comments
 (0)