Skip to content

Commit

Permalink
[libclc] Reland 7e6a739 (#16606)
Browse files Browse the repository at this point in the history
This upstream libclc commit was dropped during a pulldown. This relands
it, adapting it to our downstream libclc framework. Most of the upstream
commit was to the OpenCL builtins, which we don't directly make use of
for DPC++.

----

libclc: increase fp16 support (#98149)
Increase fp16 support to allow clspv to continue to be OpenCL compliant
following the update of the OpenCL-CTS adding more testing on math
functions and conversions with half.

Math functions are implemented by upscaling to fp32 and using the fp32
implementation. It garantees the accuracy required for half-precision
float-point by the CTS.
  • Loading branch information
frasercrmck authored Jan 14, 2025
1 parent 44c58bb commit c9b9b56
Show file tree
Hide file tree
Showing 46 changed files with 445 additions and 45 deletions.
2 changes: 2 additions & 0 deletions libclc/clc/include/clc/clcmacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@
} \
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)

#pragma OPENCL EXTENSION cl_khr_fp16 : disable

#else

#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)
Expand Down
11 changes: 11 additions & 0 deletions libclc/clspv/lib/math/fma.cl
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) {
((uint)st_fma.mantissa.lo & 0x7fffff));
}
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float)

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) {
return (half)mad((float)a, (float)b, (float)c);
}
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half)

#endif
42 changes: 24 additions & 18 deletions libclc/generic/gen_convert_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,25 +47,31 @@
'half' : 2, 'float' : 4,
'double': 8}

limit_max = {'char' : 'CHAR_MAX',
'schar' : 'CHAR_MAX',
'uchar' : 'UCHAR_MAX',
'short' : 'SHRT_MAX',
'ushort': 'USHRT_MAX',
'int' : 'INT_MAX',
'uint' : 'UINT_MAX',
'long' : 'LONG_MAX',
'ulong' : 'ULONG_MAX'}
limit_max = {
"char": "CHAR_MAX",
"schar": "CHAR_MAX",
"uchar": "UCHAR_MAX",
"short": "SHRT_MAX",
"ushort": "USHRT_MAX",
"int": "INT_MAX",
"uint": "UINT_MAX",
"long": "LONG_MAX",
"ulong": "ULONG_MAX",
"half": "0x1.ffcp+15",
}

limit_min = {'char' : 'CHAR_MIN',
'schar' : 'CHAR_MIN',
'uchar' : '0',
'short' : 'SHRT_MIN',
'ushort': '0',
'int' : 'INT_MIN',
'uint' : '0',
'long' : 'LONG_MIN',
'ulong' : '0'}
limit_min = {
"char": "CHAR_MIN",
"schar": "CHAR_MIN",
"uchar": "0",
"short": "SHRT_MIN",
"ushort": "0",
"int": "INT_MIN",
"uint": "0",
"long": "LONG_MIN",
"ulong": "0",
"half": "-0x1.ffcp+15",
}


def conditional_guard(src, dst):
Expand Down
23 changes: 20 additions & 3 deletions libclc/generic/include/clc/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,19 @@
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)

#ifdef cl_khr_fp64
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
#elif defined(cl_khr_fp64)
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
#elif defined(cl_khr_fp16)
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
#else
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
Expand All @@ -43,11 +52,19 @@
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)

#ifdef cl_khr_fp64
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
#elif defined(cl_khr_fp64)
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
#else
#elif defined(cl_khr_fp16)
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX)
#endif
Expand Down
22 changes: 19 additions & 3 deletions libclc/generic/lib/gen_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,21 @@
"uint",
"long",
"ulong",
"half",
"float",
"double",
]
int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
unsigned_types = ["uchar", "ushort", "uint", "ulong"]
float_types = ["float", "double"]
float_types = ["half", "float", "double"]
int64_types = ["long", "ulong"]
float64_types = ["double"]
float16_types = ["half"]
vector_sizes = ["", "2", "3", "4", "8", "16"]
half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]

saturation = ["", "_sat"]
rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
float_prefix = {"float": "FLT_", "double": "DBL_"}
float_suffix = {"float": "f", "double": ""}

bool_type = {
"char": "char",
Expand All @@ -90,6 +90,7 @@
"uint": "int",
"long": "long",
"ulong": "long",
"half": "short",
"float": "int",
"double": "long",
}
Expand All @@ -114,6 +115,7 @@
"uint": 4,
"long": 8,
"ulong": 8,
"half": 2,
"float": 4,
"double": 8,
}
Expand All @@ -127,6 +129,7 @@
"uint": "UINT_MAX",
"long": "LONG_MAX",
"ulong": "ULONG_MAX",
"half": "0x1.ffcp+15",
}

limit_min = {
Expand All @@ -138,24 +141,33 @@
"uint": "0",
"long": "LONG_MIN",
"ulong": "0",
"half": "-0x1.ffcp+15",
}


def conditional_guard(src, dst):
int64_count = 0
float64_count = 0
float16_count = 0
if src in int64_types:
int64_count = int64_count + 1
elif src in float64_types:
float64_count = float64_count + 1
elif src in float16_types:
float16_count = float16_count + 1
if dst in int64_types:
int64_count = int64_count + 1
elif dst in float64_types:
float64_count = float64_count + 1
elif dst in float16_types:
float16_count = float16_count + 1
if float64_count > 0:
# In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
print("#ifdef cl_khr_fp64")
return True
elif float16_count > 0:
print("#if defined cl_khr_fp16")
return True
elif int64_count > 0:
print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
return True
Expand Down Expand Up @@ -198,6 +210,10 @@ def conditional_guard(src, dst):
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
Expand Down
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/acos.cl
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,11 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double);

#endif // cl_khr_fp64

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(acos)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/acosh.cl
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,11 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/acospi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,11 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/asinh.cl
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,11 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh)

#endif
9 changes: 9 additions & 0 deletions libclc/generic/lib/math/atan.cl
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,12 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double);

#endif // cl_khr_fp64


#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(atan)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/atan2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,11 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/atan2pi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,11 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) {
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/atanh.cl
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,11 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/atanpi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,11 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi)

#endif
8 changes: 8 additions & 0 deletions libclc/generic/lib/math/cbrt.cl
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,11 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt)

#endif
12 changes: 12 additions & 0 deletions libclc/generic/lib/math/clc_rootn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -369,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny)
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int)
#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) {
return (half)__clc_rootn((float)x, y);
}

_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int);

#endif
16 changes: 15 additions & 1 deletion libclc/generic/lib/math/clc_sw_binary.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,25 @@

#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)

// TODO: Enable half precision when the sw routine is implemented
#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
}
#elif __CLC_FPSIZE == 16
#ifdef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
__CLC_GENTYPE y) {
return convert_half(
__CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y)));
}
#else
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
__CLC_GENTYPE y) {
return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)(
__CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x),
__CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y)));
}
#endif
#endif

#undef __CLC_SW_FUNC
12 changes: 11 additions & 1 deletion libclc/generic/lib/math/clc_sw_unary.inc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,19 @@
#define __CLC_SW_FUNC __CLC_XCONCAT(__clc_, __CLC_FUNC)
#endif

// TODO: Enable half precision when the sw routine is implemented
#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
return __CLC_SW_FUNC(x);
}
#elif __CLC_FPSIZE == 16
#ifdef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
return convert_half(__CLC_SW_FUNC(convert_float(x)));
}
#else
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(
__CLC_SW_FUNC(__CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x)));
}
#endif
#endif
Loading

0 comments on commit c9b9b56

Please sign in to comment.