Skip to content

Commit c416be1

Browse files
authored
Merge pull request #338 from guilhermeAlmeida1/fixIntelGPUCompatibilityBarrier
Remove return in CCL_kernel
2 parents 36fbe79 + 07fe7e9 commit c416be1

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

device/sycl/src/clusterization/clusterization_algorithm.sycl

+2-7
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ class ccl_kernel {
254254
unsigned char adjc[MAX_CELLS_PER_THREAD];
255255

256256
// It seems that sycl runs into undefined behaviour when calling
257-
// any_of_group when some threads have already run into a return. So can
258-
// only do this after running the FastSV algorithm.
257+
// group synchronisation functions when some threads have already run
258+
// into a return. As such, we cannot use returns in this kernel.
259259

260260
#pragma unroll
261261
for (index_t tst = 0; tst < MAX_CELLS_PER_THREAD; ++tst) {
@@ -295,11 +295,6 @@ class ccl_kernel {
295295

296296
item.barrier();
297297

298-
// Now that we can use return, check if any work needs to be done
299-
if (tid >= size) {
300-
return;
301-
}
302-
303298
/*
304299
* Count the number of clusters by checking how many cells have
305300
* themself assigned as a parent.

0 commit comments

Comments
 (0)