This repository was archived by the owner on Mar 28, 2023. It is now read-only.
forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 131
/
Copy pathlocal_internalization.cpp
73 lines (58 loc) · 2.14 KB
/
local_internalization.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// UNSUPPORTED: hip
// REQUIRES: fusion
// Test complete fusion with local internalization specified on the
// accessors.
#include <sycl/sycl.hpp>
using namespace sycl;
int main() {
constexpr size_t dataSize = 512;
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
for (size_t i = 0; i < dataSize; ++i) {
in1[i] = i * 2;
in2[i] = i * 3;
in3[i] = i * 4;
tmp[i] = -1;
out[i] = -1;
}
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
{
buffer<int> bIn1{in1, range{dataSize}};
buffer<int> bIn2{in2, range{dataSize}};
buffer<int> bIn3{in3, range{dataSize}};
buffer<int> bTmp{tmp, range{dataSize}};
buffer<int> bOut{out, range{dataSize}};
ext::codeplay::experimental::fusion_wrapper fw{q};
fw.start_fusion();
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
q.submit([&](handler &cgh) {
auto accIn1 = bIn1.get_access(cgh);
auto accIn2 = bIn2.get_access(cgh);
auto accTmp = bTmp.get_access(
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
cgh.parallel_for<class KernelOne>(
nd_range<1>{{dataSize}, {16}},
[=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
});
q.submit([&](handler &cgh) {
auto accTmp = bTmp.get_access(
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
auto accIn3 = bIn3.get_access(cgh);
auto accOut = bOut.get_access(cgh);
cgh.parallel_for<class KernelTwo>(
nd_range<1>{{dataSize}, {16}},
[=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
});
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
assert(!fw.is_in_fusion_mode() &&
"Queue should not be in fusion mode anymore");
}
// Check the results
for (size_t i = 0; i < dataSize; ++i) {
assert(out[i] == (20 * i * i) && "Computation error");
assert(tmp[i] == -1 && "Not internalized");
}
return 0;
}