Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Tests for vec/marray math #1002

Merged
merged 25 commits into from
Dec 2, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f95793b
tests for vec/marray math
JackAKirk Apr 21, 2022
c7e2ff8
Made template function improvement.
JackAKirk May 11, 2022
b589c32
format
JackAKirk May 11, 2022
ab479c8
split fp16 cases into new files.
JackAKirk Jun 16, 2022
9e2394e
fixed queue constructor mistake.
JackAKirk Jun 16, 2022
80e6994
format
JackAKirk Jun 16, 2022
1f0edf8
use fp16 aspect in half_builtins.cpp
JackAKirk Jun 16, 2022
499c642
removed unnecessary -fsycl-device-code-split=per_kernel.
JackAKirk Jun 16, 2022
86176da
Merge branch 'intel' into math_marray_tests
JackAKirk Jun 23, 2022
3d03969
write -> read (superficial change in context of test).
JackAKirk Jun 30, 2022
538c64b
Removed float3 powr tests.
JackAKirk Jun 30, 2022
e77ba69
Removed failing test coverage for existing float3 functions.
JackAKirk Jul 1, 2022
a3976b4
Merge branch 'intel' into math_marray_tests
JackAKirk Jul 12, 2022
4f2dfe5
Remove broken half/native cases.
JackAKirk Sep 6, 2022
29bd30b
Merge branch 'intel' into math_marray_tests
JackAKirk Sep 6, 2022
c1972f0
Removed unused cases.
JackAKirk Sep 7, 2022
68e768d
Merge branch 'math_marray_tests' of https://github.com/JackAKirk/llvm…
JackAKirk Sep 7, 2022
57f3329
Add marray -fast-math test cases.
JackAKirk Sep 12, 2022
a9efc86
Removed initially proposed native and half_prec tests.
JackAKirk Sep 13, 2022
f731c93
Added back device-code-split.
JackAKirk Sep 13, 2022
505064c
Remove host_runs.
JackAKirk Sep 14, 2022
cfc1e91
windows && level_zero marked unsupported.
Sep 22, 2022
ea6693d
Mark opencl && windows unsupported.
Sep 22, 2022
5f91765
Merge branch 'intel' into math_marray_tests
JackAKirk Dec 2, 2022
512a37a
Added fp64 aspect check.
Dec 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion SYCL/Basic/half_builtins.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand Down Expand Up @@ -166,6 +166,13 @@ template <int N> bool check(vec<float, N> a, vec<float, N> b) {

int main() {
queue q;

if (!q.get_device().has(sycl::aspect::fp16)) {
std::cout << "skipping fp16 tests: requires fp16 device aspect."
<< std::endl;
return 0;
}

float16 a, b, c, d;
for (int i = 0; i < SZ_max; i++) {
a[i] = i / (float)SZ_max;
Expand Down
51 changes: 51 additions & 0 deletions SYCL/DeviceLib/built-ins/ext_native_math.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// tests oneapi extension native tanh math function for sycl::vec and
// sycl::marray float cases.

#include "ext_native_math_common.hpp"

int main() {

sycl::queue q;

const float tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
-1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0};
const float tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1,
-0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98};
const float tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1,
-0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10};

native_tanh_tester<float>(q, tv[0], tl[0], tu[0]);
native_tanh_tester<sycl::float2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
{tu[0], tu[1]});
native_tanh_tester<sycl::float3>(
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});

native_tanh_tester<sycl::float4>(q, {tv[0], tv[1], tv[2], tv[3]},
{tl[0], tl[1], tl[2], tl[3]},
{tu[0], tu[1], tu[2], tu[3]});
native_tanh_tester<sycl::marray<float, 3>>(
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
native_tanh_tester<sycl::marray<float, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
{tl[0], tl[1], tl[2], tl[3]},
{tu[0], tu[1], tu[2], tu[3]});
native_tanh_tester<sycl::float8>(
q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
native_tanh_tester<sycl::float16>(
q,
{tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});

return 0;
}
67 changes: 67 additions & 0 deletions SYCL/DeviceLib/built-ins/ext_native_math_common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include <cassert>
#include <sycl/sycl.hpp>

template <typename T, size_t N>
void assert_out_of_bound(sycl::marray<T, N> val, sycl::marray<T, N> lower,
sycl::marray<T, N> upper) {
for (int i = 0; i < N; i++) {
assert(lower[i] < val[i] && val[i] < upper[i]);
}
}

template <typename T> void assert_out_of_bound(T val, T lower, T upper) {
assert(sycl::all(lower < val && val < upper));
}

template <>
void assert_out_of_bound<float>(float val, float lower, float upper) {
assert(lower < val && val < upper);
}

template <>
void assert_out_of_bound<sycl::half>(sycl::half val, sycl::half lower,
sycl::half upper) {
assert(lower < val && val < upper);
}

template <typename T>
void native_tanh_tester(sycl::queue q, T val, T up, T lo) {
T r = val;

#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
{
sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
q.submit([&](sycl::handler &cgh) {
auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
cgh.single_task([=]() {
AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]);
});
});
}

assert_out_of_bound(r, up, lo);
#else
assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
#endif
}

template <typename T>
void native_exp2_tester(sycl::queue q, T val, T up, T lo) {
T r = val;

#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
{
sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
q.submit([&](sycl::handler &cgh) {
auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
cgh.single_task([=]() {
AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]);
});
});
}

assert_out_of_bound(r, up, lo);
#else
assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
#endif
}
94 changes: 94 additions & 0 deletions SYCL/DeviceLib/built-ins/ext_native_math_fp16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this
// test is compiled with the -fsycl-device-code-split flag

// tests oneapi extension native math functions for sycl::vec and sycl::marray
// fp16 cases.

#include "ext_native_math_common.hpp"

int main() {

sycl::queue q;

if (!q.get_device().has(sycl::aspect::fp16)) {
std::cout << "skipping fp16 tests: requires fp16 device aspect."
<< std::endl;
return 0;
}

const sycl::half tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
-1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0};
const sycl::half tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89,
0.75, -0.1, -0.94, 0.92, -0.84, 0.82,
-1.0, 0.98, -1.10, 0.98};
const sycl::half tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91,
0.77, 0.1, -0.92, 0.94, -0.82, 0.84,
-0.98, 1.00, -0.98, 1.10};

native_tanh_tester<sycl::half>(q, tv[0], tl[0], tu[0]);
native_tanh_tester<sycl::half2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
{tu[0], tu[1]});
native_tanh_tester<sycl::half3>(q, {tv[0], tv[1], tv[2]},
{tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
native_tanh_tester<sycl::marray<sycl::half, 3>>(
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
native_tanh_tester<sycl::half4>(q, {tv[0], tv[1], tv[2], tv[3]},
{tl[0], tl[1], tl[2], tl[3]},
{tu[0], tu[1], tu[2], tu[3]});
native_tanh_tester<sycl::marray<sycl::half, 4>>(
q, {tv[0], tv[1], tv[2], tv[3]}, {tl[0], tl[1], tl[2], tl[3]},
{tu[0], tu[1], tu[2], tu[3]});
native_tanh_tester<sycl::half8>(
q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
native_tanh_tester<sycl::half16>(
q,
{tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});

const sycl::half ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0};
const sycl::half el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9,
0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9};
const sycl::half eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1,
0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1};

native_exp2_tester<sycl::half>(q, ev[0], el[0], eu[0]);
native_exp2_tester<sycl::half2>(q, {ev[0], ev[1]}, {el[0], el[1]},
{eu[0], eu[1]});
native_exp2_tester<sycl::half3>(q, {ev[0], ev[1], ev[2]},
{el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]});
native_exp2_tester<sycl::half4>(q, {ev[0], ev[1], ev[2], ev[3]},
{el[0], el[1], el[2], el[3]},
{eu[0], eu[1], eu[2], eu[3]});
native_exp2_tester<sycl::marray<sycl::half, 3>>(
q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]});
native_exp2_tester<sycl::marray<sycl::half, 4>>(
q, {ev[0], ev[1], ev[2], ev[3]}, {el[0], el[1], el[2], el[3]},
{eu[0], eu[1], eu[2], eu[3]});
native_exp2_tester<sycl::half8>(
q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]},
{el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]},
{eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]});
native_exp2_tester<sycl::half16>(
q,
{ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9],
ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]},
{el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9],
el[10], el[11], el[12], el[13], el[14], el[15]},
{eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9],
eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]});

return 0;
}
159 changes: 159 additions & 0 deletions SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <sycl/sycl.hpp>

using namespace sycl;

template <typename T1, typename T2> class TypeHelper;

template <typename T> bool checkEqual(vec<T, 3> A, size_t B) {
T TB = B;
return A.x() == TB && A.y() == TB && A.z() == TB;
}

template <typename T> bool checkEqual(vec<T, 4> A, size_t B) {
T TB = B;
return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB;
}

template <typename T, size_t N> bool checkEqual(marray<T, N> A, size_t B) {
for (int i = 0; i < N; i++) {
if (A[i] != B) {
return false;
}
}
return true;
}

#define HALF_PRECISION_OPERATOR(NAME) \
template <typename T> \
void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input, \
size_t ref) { \
{ \
buffer<T, 1> buffer1(&result, 1); \
buffer<T, 1> buffer2(&input, 1); \
deviceQueue.submit([&](handler &cgh) { \
accessor<T, 1, access::mode::write, target::device> res_access( \
buffer1, cgh); \
accessor<T, 1, access::mode::write, target::device> input_access( \
buffer2, cgh); \
cgh.single_task<TypeHelper<class half_precision##NAME, T>>([=]() { \
res_access[0] = sycl::half_precision::NAME(input_access[0]); \
}); \
}); \
} \
assert(checkEqual(result, ref)); \
}

HALF_PRECISION_OPERATOR(sin)
HALF_PRECISION_OPERATOR(tan)
HALF_PRECISION_OPERATOR(cos)
HALF_PRECISION_OPERATOR(exp)
HALF_PRECISION_OPERATOR(exp2)
HALF_PRECISION_OPERATOR(exp10)
HALF_PRECISION_OPERATOR(log)
HALF_PRECISION_OPERATOR(log2)
HALF_PRECISION_OPERATOR(log10)
HALF_PRECISION_OPERATOR(sqrt)
HALF_PRECISION_OPERATOR(rsqrt)
HALF_PRECISION_OPERATOR(recip)

#undef HALF_PRECISION_OPERATOR

#define HALF_PRECISION_OPERATOR_2(NAME) \
template <typename T> \
void half_precision_math_test_2_##NAME(queue &deviceQueue, T result, \
T input1, T input2, size_t ref) { \
{ \
buffer<T, 1> buffer1(&result, 1); \
buffer<T, 1> buffer2(&input1, 1); \
buffer<T, 1> buffer3(&input2, 1); \
deviceQueue.submit([&](handler &cgh) { \
accessor<T, 1, access::mode::write, target::device> res_access( \
buffer1, cgh); \
accessor<T, 1, access::mode::write, target::device> input1_access( \
buffer2, cgh); \
accessor<T, 1, access::mode::write, target::device> input2_access( \
buffer3, cgh); \
cgh.single_task<TypeHelper<class half_precision2##NAME, T>>([=]() { \
res_access[0] = \
sycl::half_precision::NAME(input1_access[0], input2_access[0]); \
}); \
}); \
} \
assert(checkEqual(result, ref)); \
}

HALF_PRECISION_OPERATOR_2(divide)
HALF_PRECISION_OPERATOR_2(powr)

#undef HALF_PRECISION_OPERATOR_2

template <typename T> void half_precision_math_tests_3(queue &deviceQueue) {
half_precision_math_test_sin(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0);
half_precision_math_test_tan(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 0);
half_precision_math_test_cos(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1);
half_precision_math_test_exp(deviceQueue, T{-1, -1, -1}, T{0, 0, 0}, 1);
half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 4);
half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1}, T{2, 2, 2}, 100);
half_precision_math_test_log(deviceQueue, T{-1, -1, -1}, T{1, 1, 1}, 0);
half_precision_math_test_log2(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2);
half_precision_math_test_log10(deviceQueue, T{-1, -1, -1}, T{100, 100, 100},
2);
half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1}, T{4, 4, 4}, 2);
half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1},
T{0.25, 0.25, 0.25}, 2);
half_precision_math_test_recip(deviceQueue, T{-1, -1, -1},
T{0.25, 0.25, 0.25}, 4);
half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1}, T{2, 2, 2},
T{2, 2, 2}, 4);
half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1}, T{4, 4, 4},
T{2, 2, 2}, 2);
}

template <typename T> void half_precision_math_tests_4(queue &deviceQueue) {
half_precision_math_test_sin(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0},
0);
half_precision_math_test_tan(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0},
0);
half_precision_math_test_cos(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0},
1);
half_precision_math_test_exp(deviceQueue, T{-1, -1, -1, -1}, T{0, 0, 0, 0},
1);
half_precision_math_test_exp2(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2},
4);
half_precision_math_test_exp10(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2},
100);
half_precision_math_test_log(deviceQueue, T{-1, -1, -1, -1}, T{1, 1, 1, 1},
0);
half_precision_math_test_log2(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4},
2);
half_precision_math_test_log10(deviceQueue, T{-1, -1, -1, -1},
T{100, 100, 100, 100}, 2);
half_precision_math_test_sqrt(deviceQueue, T{-1, -1, -1, -1}, T{4, 4, 4, 4},
2);
half_precision_math_test_rsqrt(deviceQueue, T{-1, -1, -1, -1},
T{0.25, 0.25, 0.25, 0.25}, 2);
half_precision_math_test_recip(deviceQueue, T{-1, -1, -1, -1},
T{0.25, 0.25, 0.25, 0.25}, 4);
half_precision_math_test_2_powr(deviceQueue, T{-1, -1, -1, -1}, T{2, 2, 2, 2},
T{2, 2, 2, 2}, 4);
half_precision_math_test_2_divide(deviceQueue, T{-1, -1, -1, -1},
T{4, 4, 4, 4}, T{2, 2, 2, 2}, 2);
}

int main() {
queue deviceQueue;

half_precision_math_tests_3<float3>(deviceQueue);
half_precision_math_tests_3<marray<float, 3>>(deviceQueue);

half_precision_math_tests_4<float4>(deviceQueue);
half_precision_math_tests_4<marray<float, 4>>(deviceQueue);
std::cout << "Pass" << std::endl;
return 0;
}
Loading