Skip to content

Commit effbbab

Browse files
authored
[SYCL][Fusion] Avoid deadlock on events with no command group (#12621)
`sycl::queue` shortcut functions, e.g., `sycl::queue::memcpy`, may not create a command group, so events coming from those will not have a command group attached. Avoid deadlock when going through event list in fusion mode. Signed-off-by: Victor Perez <[email protected]>
1 parent c200fe3 commit effbbab

File tree

3 files changed

+100
-1
lines changed

3 files changed

+100
-1
lines changed

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG(
963963
for (auto Ev = Events.begin(); Ev != Events.end();) {
964964
auto *EvDepCmd = static_cast<Command *>((*Ev)->getCommand());
965965
if (!EvDepCmd) {
966+
++Ev;
966967
continue;
967968
}
968969
// Handle event dependencies on any commands part of another active

sycl/test-e2e/KernelFusion/Reduction/group_reduce_and_last_wg_detection.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// RUN: %{build} -fsycl-embed-ir -o %t.out
22
// RUN: %{run} %t.out
3-
// UNSUPPORTED: hip || cuda
43

54
// COM: When ran on HIP and CUDA, this algorithm launches 'memcpy' commands
65
// leading to an infinite loop due to a bug in kernel fusion.
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// RUN: %{build} -fsycl-embed-ir -o %t.out
2+
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 \
3+
// RUN: | FileCheck %s --implicit-check-not=ERROR
4+
5+
// Test fusion with queue shortcut functions being involved.
6+
7+
#include <sycl/sycl.hpp>
8+
9+
using namespace sycl;
10+
11+
template <int FusionStartPoint, int KernelNum> class Kernel;
12+
13+
template <int FusionStartPoint> void test() {
14+
static_assert(0 <= FusionStartPoint && FusionStartPoint < 3,
15+
"Invalid fusion start point");
16+
17+
constexpr size_t size = 1024;
18+
constexpr float value = 10;
19+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
20+
std::array<float, size> h;
21+
h.fill(0);
22+
auto *ptr0 = sycl::malloc_device<float>(size, q);
23+
auto *ptr1 = sycl::malloc_device<float>(size, q);
24+
25+
{
26+
range<1> r{size};
27+
28+
ext::codeplay::experimental::fusion_wrapper fw{q};
29+
30+
if constexpr (FusionStartPoint == 0) {
31+
fw.start_fusion();
32+
}
33+
34+
// ptr0(x) = value
35+
auto e0 = q.parallel_for<Kernel<FusionStartPoint, 0>>(
36+
r, [=](sycl::id<1> i) { ptr0[i] = value; });
37+
// ptr1(x) = value / 2
38+
auto e1 = q.parallel_for<Kernel<FusionStartPoint, 1>>(
39+
r, [=](sycl::id<1> i) { ptr1[i] = value / 2; });
40+
41+
if constexpr (FusionStartPoint == 1) {
42+
fw.start_fusion();
43+
}
44+
45+
// ptr0(x) = value / 2 if x < size / 2 else value
46+
auto e2 = q.memcpy(ptr0, ptr1, sizeof(float) * size / 2, {e0, e1});
47+
48+
if constexpr (FusionStartPoint == 2) {
49+
fw.start_fusion();
50+
}
51+
52+
// ptr0(x) = value / 2 + 1 if x < size / 2 else value + 1
53+
auto e3 = q.parallel_for<Kernel<FusionStartPoint, 2>>(
54+
r, e2, [=](sycl::id<1> i) { ptr0[i]++; });
55+
56+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
57+
58+
// Copyback
59+
q.memcpy(h.data(), ptr0, sizeof(float) * size, e3).wait();
60+
}
61+
62+
sycl::free(ptr0, q);
63+
sycl::free(ptr1, q);
64+
65+
assert(std::all_of(h.begin(), h.begin() + size / 2,
66+
[=](float f) { return f == value / 2 + 1; }) &&
67+
"ERROR");
68+
assert(std::all_of(h.begin() + size / 2, h.end(),
69+
[=](float f) { return f == value + 1; }) &&
70+
"ERROR");
71+
}
72+
73+
int main() {
74+
std::cerr << "FusionStartPoint = 0:\n";
75+
// COM: memcpy leads to a CG being created as it depends on CGs not producing
76+
// a PI event (coming from the CGs to be fused), so not safe to bypass. Fusion
77+
// should be cancelled as a dependency with an event to be fused is found.
78+
79+
// CHECK: FusionStartPoint = 0:
80+
// CHECK-NEXT: WARNING: Not fusing 'copy usm' command group. Can only fuse device kernel command groups.
81+
// CHECK-NEXT: WARNING: Aborting fusion because synchronization with one of the kernels in the fusion list was requested
82+
test<0>();
83+
84+
std::cerr << "FusionStartPoint = 1:\n";
85+
// COM: memcpy does not create CG, memory manager handles the operation
86+
// instead. As no dependency with a CG to be fused is found, events are issued
87+
// as usual and fusion takes place.
88+
89+
// CHECK-NEXT: FusionStartPoint = 1:
90+
// CHECK-NEXT: JIT DEBUG: Compiling new kernel, no suitable cached kernel found
91+
test<1>();
92+
93+
std::cerr << "FusionStartPoint = 2:\n";
94+
// COM: Same as above.
95+
96+
// CHECK-NEXT: FusionStartPoint = 2:
97+
// CHECK-NEXT: JIT DEBUG: Compiling new kernel, no suitable cached kernel found
98+
test<2>();
99+
}

0 commit comments

Comments
 (0)