Skip to content

Commit 3a60c85

Browse files
authored
[SYCL][InvokeSIMD] Add basic numerics test for simd_mask (#8976)
Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 39d1c65 commit 3a60c85

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// TODO: enable on Windows once driver is ready
2+
// REQUIRES: gpu && linux
3+
// UNSUPPORTED: cuda || hip
4+
//
5+
// Check that full compilation works:
6+
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
7+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
8+
#include <sycl/detail/boost/mp11.hpp>
9+
#include <sycl/ext/intel/esimd.hpp>
10+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
11+
#include <sycl/sycl.hpp>
12+
13+
#include <functional>
14+
#include <iostream>
15+
#include <type_traits>
16+
using namespace sycl;
17+
using namespace sycl::ext::oneapi::experimental;
18+
namespace esimd = sycl::ext::intel::esimd;
19+
constexpr int VL = 16;
20+
21+
[[intel::device_indirectly_callable]] simd<float, VL>
22+
SIMD_CALLEE(simd<float, VL> va, simd_mask<float, VL> mask) SYCL_ESIMD_FUNCTION {
23+
esimd::simd<float, VL> ret(0);
24+
esimd::simd_mask<VL> emask;
25+
for(int i = 0; i < VL; i++)
26+
emask[i] = static_cast<bool>(mask[i]);
27+
ret.merge(va, !emask);
28+
return ret;
29+
}
30+
31+
int main() {
32+
sycl::queue q;
33+
auto dev = q.get_device();
34+
35+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
36+
<< "\n";
37+
constexpr unsigned Size = 1024;
38+
constexpr unsigned GroupSize = 4 * VL;
39+
40+
std::array<float, Size> A;
41+
std::array<float, Size> C;
42+
std::array<bool, Size> M;
43+
44+
for (unsigned i = 0; i < Size; ++i) {
45+
A[i] = i;
46+
C[i] = 0;
47+
M[i] = i % 2;
48+
}
49+
50+
sycl::buffer<float> ABuf(A);
51+
sycl::buffer<float> CBuf(C);
52+
sycl::buffer<bool> MBuf(M);
53+
54+
sycl::range<1> GlobalRange{Size};
55+
// Number of workitems in each workgroup.
56+
sycl::range<1> LocalRange{GroupSize};
57+
58+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
59+
60+
try {
61+
auto e = q.submit([&](handler &cgh) {
62+
sycl::accessor A_acc{ABuf, cgh, sycl::read_only};
63+
sycl::accessor C_acc{CBuf, cgh, sycl::write_only};
64+
sycl::accessor M_acc{MBuf, cgh, sycl::read_only};
65+
cgh.parallel_for(Range, [=](nd_item<1> ndi) {
66+
sub_group sg = ndi.get_sub_group();
67+
uint32_t wi_id = ndi.get_global_linear_id();
68+
float res = invoke_simd(sg, SIMD_CALLEE, A_acc[wi_id], M_acc[wi_id]);
69+
C_acc[wi_id] = res;
70+
});
71+
});
72+
e.wait();
73+
} catch (sycl::exception const &e) {
74+
75+
std::cout << "SYCL exception caught: " << e.what() << '\n';
76+
return e.code().value();
77+
}
78+
79+
int err_cnt = 0;
80+
sycl::host_accessor A_acc(ABuf);
81+
sycl::host_accessor C_acc(CBuf);
82+
83+
for (unsigned i = 0; i < Size; ++i) {
84+
if ((i % 2 == 0) && A_acc[i] != C_acc[i]) {
85+
if (++err_cnt < 10) {
86+
std::cout << "failed at index " << i << ", " << C_acc[i]
87+
<< " != " << A_acc[i] << "\n";
88+
}
89+
}
90+
if ((i % 2 == 1) && C_acc[i] != 0.0f) {
91+
if (++err_cnt < 10) {
92+
std::cout << "failed at index " << i << ", " << C_acc[i] << " != 0\n";
93+
}
94+
}
95+
}
96+
if (err_cnt > 0) {
97+
std::cout << " pass rate: "
98+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
99+
<< (Size - err_cnt) << "/" << Size << ")\n";
100+
}
101+
102+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
103+
return err_cnt == 0;
104+
}

0 commit comments

Comments
 (0)