Skip to content

Commit 5703d85

Browse files
authored
[WebAssembly] Add intrinsics to wasm_simd128.h for all FP16 instructions (llvm#106465)
Getting this to work required a few additional changes: - Add builtins for any instructions that can't be done with plain C currently. - Add support for the saturating version of fp_to_<s,i>_I16x8. Other vector sizes supported this already. - Support bitcast of f16x8 to v128. Needed to return a __f16x8 as v128_t.
1 parent 206b5af commit 5703d85

File tree

7 files changed

+348
-13
lines changed

7 files changed

+348
-13
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

+9
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128")
124124
TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128")
125125
TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128")
126126

127+
TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16")
127128
TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
128129
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
129130

@@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16")
140141
TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16")
141142
TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16")
142143

144+
TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16")
145+
TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16")
146+
TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16")
147+
TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16")
143148
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
144149
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
145150
TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128")
@@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128")
151156

152157
TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
153158

159+
TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16")
154160
TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
155161
TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")
156162

163+
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128")
164+
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128")
165+
157166
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
158167
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
159168

clang/lib/CodeGen/CGBuiltin.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -21211,6 +21211,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2121121211
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
2121221212
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
2121321213
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
21214+
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
2121421215
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
2121521216
Value *Src = EmitScalarExpr(E->getArg(0));
2121621217
llvm::Type *ResT = ConvertType(E->getType());
@@ -21222,6 +21223,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2122221223
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
2122321224
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
2122421225
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
21226+
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
2122521227
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
2122621228
Value *Src = EmitScalarExpr(E->getArg(0));
2122721229
llvm::Type *ResT = ConvertType(E->getType());
@@ -21269,6 +21271,10 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2126921271
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
2127021272
return Builder.CreateCall(Callee, {LHS, RHS});
2127121273
}
21274+
case WebAssembly::BI__builtin_wasm_ceil_f16x8:
21275+
case WebAssembly::BI__builtin_wasm_floor_f16x8:
21276+
case WebAssembly::BI__builtin_wasm_trunc_f16x8:
21277+
case WebAssembly::BI__builtin_wasm_nearest_f16x8:
2127221278
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
2127321279
case WebAssembly::BI__builtin_wasm_floor_f32x4:
2127421280
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
@@ -21279,18 +21285,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2127921285
case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
2128021286
unsigned IntNo;
2128121287
switch (BuiltinID) {
21288+
case WebAssembly::BI__builtin_wasm_ceil_f16x8:
2128221289
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
2128321290
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
2128421291
IntNo = Intrinsic::ceil;
2128521292
break;
21293+
case WebAssembly::BI__builtin_wasm_floor_f16x8:
2128621294
case WebAssembly::BI__builtin_wasm_floor_f32x4:
2128721295
case WebAssembly::BI__builtin_wasm_floor_f64x2:
2128821296
IntNo = Intrinsic::floor;
2128921297
break;
21298+
case WebAssembly::BI__builtin_wasm_trunc_f16x8:
2129021299
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
2129121300
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
2129221301
IntNo = Intrinsic::trunc;
2129321302
break;
21303+
case WebAssembly::BI__builtin_wasm_nearest_f16x8:
2129421304
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
2129521305
case WebAssembly::BI__builtin_wasm_nearest_f64x2:
2129621306
IntNo = Intrinsic::nearbyint;
@@ -21489,12 +21499,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2148921499
CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
2149021500
return Builder.CreateCall(Callee, {Vec});
2149121501
}
21502+
case WebAssembly::BI__builtin_wasm_abs_f16x8:
2149221503
case WebAssembly::BI__builtin_wasm_abs_f32x4:
2149321504
case WebAssembly::BI__builtin_wasm_abs_f64x2: {
2149421505
Value *Vec = EmitScalarExpr(E->getArg(0));
2149521506
Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
2149621507
return Builder.CreateCall(Callee, {Vec});
2149721508
}
21509+
case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
2149821510
case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
2149921511
case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
2150021512
Value *Vec = EmitScalarExpr(E->getArg(0));

clang/lib/Headers/wasm_simd128.h

+147
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ typedef unsigned long long __u64x2
3333
__attribute__((__vector_size__(16), __aligned__(16)));
3434
typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
3535
typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
36+
typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16)));
3637

3738
typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8)));
3839
typedef unsigned char __u8x8
@@ -1878,6 +1879,152 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) {
18781879
(__i8x16)__a, (__i8x16)__b, (__i32x4)__c);
18791880
}
18801881

1882+
// FP16 intrinsics
1883+
#define __FP16_FN_ATTRS \
1884+
__attribute__((__always_inline__, __nodebug__, __target__("fp16"), \
1885+
__min_vector_width__(128)))
1886+
1887+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) {
1888+
return (v128_t)__builtin_wasm_splat_f16x8(__a);
1889+
}
1890+
1891+
static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a,
1892+
int __i)
1893+
__REQUIRE_CONSTANT(__i) {
1894+
return __builtin_wasm_extract_lane_f16x8((__f16x8)__a, __i);
1895+
}
1896+
1897+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a,
1898+
int __i,
1899+
float __b)
1900+
__REQUIRE_CONSTANT(__i) {
1901+
return (v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)__a, __i, __b);
1902+
}
1903+
1904+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) {
1905+
return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a);
1906+
}
1907+
1908+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) {
1909+
return (v128_t)(-(__f16x8)__a);
1910+
}
1911+
1912+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) {
1913+
return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a);
1914+
}
1915+
1916+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) {
1917+
return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a);
1918+
}
1919+
1920+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) {
1921+
return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a);
1922+
}
1923+
1924+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) {
1925+
return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a);
1926+
}
1927+
1928+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) {
1929+
return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a);
1930+
}
1931+
1932+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) {
1933+
return (v128_t)((__f16x8)__a == (__f16x8)__b);
1934+
}
1935+
1936+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) {
1937+
return (v128_t)((__f16x8)__a != (__f16x8)__b);
1938+
}
1939+
1940+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) {
1941+
return (v128_t)((__f16x8)__a < (__f16x8)__b);
1942+
}
1943+
1944+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) {
1945+
return (v128_t)((__f16x8)__a > (__f16x8)__b);
1946+
}
1947+
1948+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) {
1949+
return (v128_t)((__f16x8)__a <= (__f16x8)__b);
1950+
}
1951+
1952+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) {
1953+
return (v128_t)((__f16x8)__a >= (__f16x8)__b);
1954+
}
1955+
1956+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a,
1957+
v128_t __b) {
1958+
return (v128_t)((__f16x8)__a + (__f16x8)__b);
1959+
}
1960+
1961+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a,
1962+
v128_t __b) {
1963+
return (v128_t)((__f16x8)__a - (__f16x8)__b);
1964+
}
1965+
1966+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a,
1967+
v128_t __b) {
1968+
return (v128_t)((__f16x8)__a * (__f16x8)__b);
1969+
}
1970+
1971+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a,
1972+
v128_t __b) {
1973+
return (v128_t)((__f16x8)__a / (__f16x8)__b);
1974+
}
1975+
1976+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a,
1977+
v128_t __b) {
1978+
return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b);
1979+
}
1980+
1981+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a,
1982+
v128_t __b) {
1983+
return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b);
1984+
}
1985+
1986+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a,
1987+
v128_t __b) {
1988+
return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b);
1989+
}
1990+
1991+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a,
1992+
v128_t __b) {
1993+
return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b);
1994+
}
1995+
1996+
static __inline__ v128_t __FP16_FN_ATTRS
1997+
wasm_i16x8_trunc_sat_f16x8(v128_t __a) {
1998+
return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a);
1999+
}
2000+
2001+
static __inline__ v128_t __FP16_FN_ATTRS
2002+
wasm_u16x8_trunc_sat_f16x8(v128_t __a) {
2003+
return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a);
2004+
}
2005+
2006+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) {
2007+
return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8);
2008+
}
2009+
2010+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
2011+
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
2012+
}
2013+
2014+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
2015+
v128_t __b,
2016+
v128_t __c) {
2017+
return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b,
2018+
(__f16x8)__c);
2019+
}
2020+
2021+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a,
2022+
v128_t __b,
2023+
v128_t __c) {
2024+
return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b,
2025+
(__f16x8)__c);
2026+
}
2027+
18812028
// Deprecated intrinsics
18822029

18832030
static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle")

0 commit comments

Comments
 (0)