This repository was archived by the owner on Mar 28, 2023. It is now read-only.
forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 131
/
Copy pathexisting_local_accessor.cpp
78 lines (63 loc) · 2.43 KB
/
existing_local_accessor.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// UNSUPPORTED: hip
// REQUIRES: fusion
// Test complete fusion with local internalization and an local accessor that
// already exists in one of the input kernels.
#include <sycl/sycl.hpp>
using namespace sycl;
int main() {
constexpr size_t dataSize = 512;
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
for (size_t i = 0; i < dataSize; ++i) {
in1[i] = i * 2;
in2[i] = i * 3;
in3[i] = i * 4;
tmp[i] = -1;
out[i] = -1;
}
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
{
buffer<int> bIn1{in1, range{dataSize}};
buffer<int> bIn2{in2, range{dataSize}};
buffer<int> bIn3{in3, range{dataSize}};
buffer<int> bTmp{tmp, range{dataSize}};
buffer<int> bOut{out, range{dataSize}};
ext::codeplay::experimental::fusion_wrapper fw{q};
fw.start_fusion();
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
q.submit([&](handler &cgh) {
auto accIn1 = bIn1.get_access(cgh);
auto accIn2 = bIn2.get_access(cgh);
auto accTmp = bTmp.get_access(
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
local_accessor<int> accLocal{16, cgh};
cgh.parallel_for<class KernelOne>(
nd_range<1>{{dataSize}, {16}}, [=](nd_item<1> i) {
size_t globalIdx = i.get_global_linear_id();
size_t localIdx = i.get_local_linear_id();
accLocal[localIdx] = accIn2[globalIdx];
accTmp[globalIdx] = accIn1[globalIdx] + accLocal[localIdx];
});
});
q.submit([&](handler &cgh) {
auto accTmp = bTmp.get_access(
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
auto accIn3 = bIn3.get_access(cgh);
auto accOut = bOut.get_access(cgh);
cgh.parallel_for<class KernelTwo>(
nd_range<1>{{dataSize}, {16}},
[=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
});
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
assert(!fw.is_in_fusion_mode() &&
"Queue should not be in fusion mode anymore");
}
// Check the results
for (size_t i = 0; i < dataSize; ++i) {
assert(out[i] == (20 * i * i) && "Computation error");
assert(tmp[i] == -1 && "Not internalized");
}
return 0;
}