Skip to content

Commit 5db0f72

Browse files
committed
add f16 and f16x2 to liclc with half vects made of f16x2
1 parent 464e761 commit 5db0f72

File tree

3 files changed

+49
-23
lines changed

3 files changed

+49
-23
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ void CodeGenModule::Release() {
782782
getTarget().getTargetOpts().NVVMCudaPrecSqrt);
783783
}
784784

785-
if ( LangOpts.isSYCL() && getTriple().isNVPTX()) {
785+
if (LangOpts.isSYCL() && getTriple().isNVPTX()) {
786786
getModule().addModuleFlag(llvm::Module::Override,
787787
"nvvm-reflect-approx-tanhf",
788788
getTarget().getTargetOpts().NVVMCudaApproxTanhf);

libclc/generic/include/clcmacro.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@
99
#ifndef __CLC_MACRO_H
1010
#define __CLC_MACRO_H
1111

12-
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
13-
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
14-
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
15-
} \
16-
\
12+
#define _CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
1713
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
1814
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
1915
} \
@@ -30,12 +26,14 @@
3026
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
3127
}
3228

33-
#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
34-
ARG2_TYPE) \
35-
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
36-
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
29+
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
30+
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
31+
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
3732
} \
38-
\
33+
_CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)
34+
35+
#define _CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
36+
ARG2_TYPE) \
3937
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
4038
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
4139
FUNCTION(x.z, y.z)); \

libclc/ptx-nvidiacl/libspirv/math/tanh.cl

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,45 @@
1111
#include "../../include/libdevice.h"
1212
#include <clcmacro.h>
1313

14-
int __clc_nvvm_reflect_arch();
15-
int __clc_nvvm_reflect_approx_tanh();
16-
17-
float __select_tanhf (float x) {
18-
if (__clc_nvvm_reflect_approx_tanh() && __clc_nvvm_reflect_arch() >= 800) {
19-
return __nvvm_tanh_approx_f(x);
20-
}
21-
return __nv_tanhf(x);
14+
extern int __clc_nvvm_reflect_arch();
15+
extern int __clc_nvvm_reflect_approx_tanh();
16+
17+
#define __USE_TANH_APPROX \
18+
(__clc_nvvm_reflect_approx_tanh() && (__clc_nvvm_reflect_arch() >= 750))
19+
20+
#ifdef cl_khr_fp64
21+
22+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
23+
24+
_CLC_DEF _CLC_OVERLOAD double __spirv_ocl_tanh(double x) {
25+
return __nv_tanh(x);
2226
}
2327

24-
#define __CLC_FUNCTION __spirv_ocl_tanh
25-
#define __CLC_BUILTIN __nv_tanh
26-
#define __CLC_BUILTIN_F __select_tanhf
27-
#include <math/unary_builtin.inc>
28+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_tanh, double)
29+
30+
#endif
31+
32+
_CLC_DEF _CLC_OVERLOAD float __spirv_ocl_tanh(float x) {
33+
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f(x) : __nv_tanhf(x);
34+
}
35+
36+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_tanh, float)
37+
38+
#ifdef cl_khr_fp16
39+
40+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
41+
42+
_CLC_DEF _CLC_OVERLOAD half __spirv_ocl_tanh(half x) {
43+
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16(x) : __nv_tanhf(x);
44+
}
45+
46+
_CLC_DEF _CLC_OVERLOAD half2 __spirv_ocl_tanh(half2 x) {
47+
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16x2(x)
48+
: (half2)(__nv_tanhf(x.x), __nv_tanhf(x.y));
49+
}
50+
51+
_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_tanh, half)
52+
53+
#endif
54+
55+
#undef __USE_TANH_APPROX

0 commit comments

Comments
 (0)