Skip to content

Commit

Permalink
amdgcn: Enable SIMD vectorization of math functions
Browse files Browse the repository at this point in the history
Calls to vectorized versions of routines in the math library will now
be inserted when vectorizing code containing supported math functions.

2023-03-02  Kwok Cheung Yeung  <[email protected]>
	    Paul-Antoine Arras  <[email protected]>

	gcc/
	* builtins.cc (mathfn_built_in_explicit): New.
	* config/gcn/gcn.cc: Include case-cfn-macros.h.
	(mathfn_built_in_explicit): Add prototype.
	(gcn_vectorize_builtin_vectorized_function): New.
	(gcn_libc_has_function): New.
	(TARGET_LIBC_HAS_FUNCTION): Define.
	(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.

	gcc/testsuite/
	* gcc.target/gcn/simd-math-1.c: New testcase.
	* gcc.target/gcn/simd-math-2.c: New testcase.

	libgomp/
	* testsuite/libgomp.c/simd-math-1.c: New testcase.
  • Loading branch information
k-yeung committed Mar 2, 2023
1 parent 5425159 commit ce9cd72
Show file tree
Hide file tree
Showing 5 changed files with 549 additions and 0 deletions.
8 changes: 8 additions & 0 deletions gcc/builtins.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2089,6 +2089,14 @@ mathfn_built_in (tree type, combined_fn fn)
return mathfn_built_in_1 (type, fn, /*implicit=*/ 1);
}

/* Like mathfn_built_in_1, but always use the explicit array. */

tree
mathfn_built_in_explicit (tree type, combined_fn fn)
{
return mathfn_built_in_1 (type, fn, /*implicit=*/ 0);
}

/* Like mathfn_built_in_1, but take a built_in_function and
always use the implicit array. */

Expand Down
110 changes: 110 additions & 0 deletions gcc/config/gcn/gcn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "dwarf2.h"
#include "gimple.h"
#include "cgraph.h"
#include "case-cfn-macros.h"

/* This file should be included last. */
#include "target-def.h"
Expand Down Expand Up @@ -5240,6 +5241,110 @@ gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node))
return 0;
}

tree mathfn_built_in_explicit (tree, combined_fn);

/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
Return the function declaration of the vectorized version of the builtin
in the math library if available. */

tree
gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
tree type_in)
{
if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;

machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
int out_n = TYPE_VECTOR_SUBPARTS (type_out);
machine_mode in_mode = TYPE_MODE (TREE_TYPE (type_in));
int in_n = TYPE_VECTOR_SUBPARTS (type_in);
combined_fn cfn = combined_fn (fn);

/* Keep this consistent with the list of vectorized math routines. */
int implicit_p;
switch (fn)
{
CASE_CFN_ACOS:
CASE_CFN_ACOSH:
CASE_CFN_ASIN:
CASE_CFN_ASINH:
CASE_CFN_ATAN:
CASE_CFN_ATAN2:
CASE_CFN_ATANH:
CASE_CFN_COPYSIGN:
CASE_CFN_COS:
CASE_CFN_COSH:
CASE_CFN_ERF:
CASE_CFN_EXP:
CASE_CFN_EXP2:
CASE_CFN_FINITE:
CASE_CFN_FMOD:
CASE_CFN_GAMMA:
CASE_CFN_HYPOT:
CASE_CFN_ISNAN:
CASE_CFN_LGAMMA:
CASE_CFN_LOG:
CASE_CFN_LOG10:
CASE_CFN_LOG2:
CASE_CFN_POW:
CASE_CFN_REMAINDER:
CASE_CFN_RINT:
CASE_CFN_SIN:
CASE_CFN_SINH:
CASE_CFN_SQRT:
CASE_CFN_TAN:
CASE_CFN_TANH:
CASE_CFN_TGAMMA:
implicit_p = 1;
break;

CASE_CFN_SCALB:
CASE_CFN_SIGNIFICAND:
implicit_p = 0;
break;

default:
return NULL_TREE;
}

tree out_t_node = (out_mode == DFmode) ? double_type_node : float_type_node;
tree fndecl = implicit_p ? mathfn_built_in (out_t_node, cfn)
: mathfn_built_in_explicit (out_t_node, cfn);

const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
char name[20];
sprintf (name, out_mode == DFmode ? "v%ddf_%s" : "v%dsf_%s",
out_n, bname + 10);

unsigned arity = 0;
for (tree args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
arity++;

tree fntype = (arity == 1)
? build_function_type_list (type_out, type_in, NULL)
: build_function_type_list (type_out, type_in, type_in, NULL);

/* Build a function declaration for the vectorized function. */
tree new_fndecl = build_decl (BUILTINS_LOCATION,
FUNCTION_DECL, get_identifier (name), fntype);
TREE_PUBLIC (new_fndecl) = 1;
DECL_EXTERNAL (new_fndecl) = 1;
DECL_IS_NOVOPS (new_fndecl) = 1;
TREE_READONLY (new_fndecl) = 1;

return new_fndecl;
}

/* Implement TARGET_LIBC_HAS_FUNCTION. */

bool
gcn_libc_has_function (enum function_class fn_class,
tree type)
{
return bsd_libc_has_function (fn_class, type);
}

/* }}} */
/* {{{ md_reorg pass. */

Expand Down Expand Up @@ -7290,6 +7395,8 @@ gcn_dwarf_register_span (rtx rtl)
gcn_ira_change_pseudo_allocno_class
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
#undef TARGET_LIBC_HAS_FUNCTION
#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
#undef TARGET_LRA_P
#define TARGET_LRA_P hook_bool_void_true
#undef TARGET_MACHINE_DEPENDENT_REORG
Expand Down Expand Up @@ -7337,6 +7444,9 @@ gcn_dwarf_register_span (rtx rtl)
#define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
gcn_vectorize_builtin_vectorized_function
#undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
Expand Down
206 changes: 206 additions & 0 deletions gcc/testsuite/gcc.target/gcn/simd-math-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/* Check that calls to the vectorized math functions are actually emitted. */

/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno -mstack-size=3000000 -fdump-tree-vect" } */


#undef PRINT_RESULT
#define VERBOSE 0
#define EARLY_EXIT 1

#include <math.h>
#include <stdlib.h>

#ifdef PRINT_RESULT
#include <stdio.h>
#define PRINTF printf
#else
static void null_printf (const char *f, ...) { }

#define PRINTF null_printf
#endif

#define N 512
#define EPSILON_float 1e-5
#define EPSILON_double 1e-10

static int failed = 0;

int deviation_float (float x, float y)
{
union {
float f;
unsigned u;
} u, v;

u.f = x;
v.f = y;

unsigned mask = 0x80000000U;
int i;

for (i = 32; i > 0; i--)
if ((u.u ^ v.u) & mask)
break;
else
mask >>= 1;

return i;
}

int deviation_double (double x, double y)
{
union {
double d;
unsigned long long u;
} u, v;

u.d = x;
v.d = y;

unsigned long long mask = 0x8000000000000000ULL;
int i;

for (i = 64; i > 0; i--)
if ((u.u ^ v.u) & mask)
break;
else
mask >>= 1;

return i;
}

#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
__attribute__((optimize("no-tree-vectorize"))) \
__attribute__((optimize("no-unsafe-math-optimizations"))) \
void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
{ \
int failed = 0; \
for (int i = 0; i < N; i++) { \
TFLOAT expected = FUN (a[i]); \
TFLOAT diff = __builtin_fabs (expected - res[i]); \
int deviation = deviation_##TFLOAT (expected, res[i]); \
int fail = isnan (res[i]) != isnan (expected) \
|| isinf (res[i]) != isinf (expected) \
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
failed |= fail; \
if (VERBOSE || fail) \
PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
if (EARLY_EXIT && fail) \
exit (1); \
} \
} \
void test_##FUN (void) \
{ \
TFLOAT res[N], a[N]; \
for (int i = 0; i < N; i++) \
a[i] = LOW + ((HIGH - LOW) / N) * i; \
for (int i = 0; i < N; i++) \
res[i] = FUN (a[i]); \
check_##FUN (res, a); \
}\
test_##FUN ();

#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
__attribute__((optimize("no-tree-vectorize"))) \
__attribute__((optimize("no-unsafe-math-optimizations"))) \
void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
{ \
int failed = 0; \
for (int i = 0; i < N; i++) { \
TFLOAT expected = FUN (a[i], b[i]); \
TFLOAT diff = __builtin_fabs (expected - res[i]); \
int deviation = deviation_##TFLOAT (expected, res[i]); \
int fail = isnan (res[i]) != isnan (expected) \
|| isinf (res[i]) != isinf (expected) \
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
failed |= fail; \
if (VERBOSE || fail) \
PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
if (EARLY_EXIT && fail) \
exit (1); \
} \
} \
void test_##FUN (void) \
{ \
TFLOAT res[N], a[N], b[N]; \
for (int i = 0; i < N; i++) { \
a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
} \
for (int i = 0; i < N; i++) \
res[i] = FUN (a[i], b[i]); \
check_##FUN (res, a, b); \
}\
test_##FUN ();

int main (void)
{
TEST_FUN (float, -1.1, 1.1, acosf); /* { dg-final { scan-tree-dump "v64sf_acosf" "vect" } }*/
TEST_FUN (float, -10, 10, acoshf); /* { dg-final { scan-tree-dump "v64sf_acoshf" "vect" } }*/
TEST_FUN (float, -1.1, 1.1, asinf); /* { dg-final { scan-tree-dump "v64sf_asinf" "vect" } }*/
TEST_FUN (float, -10, 10, asinhf); /* { dg-final { scan-tree-dump "v64sf_asinhf" "vect" } }*/
TEST_FUN (float, -1.1, 1.1, atanf); /* { dg-final { scan-tree-dump "v64sf_atanf" "vect" } }*/
TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f); /* { dg-final { scan-tree-dump "v64sf_atan2f" "vect" } }*/
TEST_FUN (float, -2.0, 2.0, atanhf); /* { dg-final { scan-tree-dump "v64sf_atanhf" "vect" } }*/
TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf); /* { dg-final { scan-tree-dump "v64sf_copysignf" "vect" } }*/
TEST_FUN (float, -3.14159265359, 3.14159265359, cosf); /* { dg-final { scan-tree-dump "v64sf_cosf" "vect" } }*/
TEST_FUN (float, -3.14159265359, 3.14159265359, coshf); /* { dg-final { scan-tree-dump "v64sf_coshf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, erff); /* { dg-final { scan-tree-dump "v64sf_erff" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, expf); /* { dg-final { scan-tree-dump "v64sf_expf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, exp2f); /* { dg-final { scan-tree-dump "v64sf_exp2f" "vect" } }*/
TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf); /* { dg-final { scan-tree-dump "v64sf_fmodf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, gammaf); /* { dg-final { scan-tree-dump "v64sf_gammaf" "vect" { xfail *-*-*} } }*/
TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf); /* { dg-final { scan-tree-dump "v64sf_hypotf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, lgammaf); /* { dg-final { scan-tree-dump "v64sf_lgammaf" "vect" { xfail *-*-*} } }*/
TEST_FUN (float, -1.0, 50.0, logf); /* { dg-final { scan-tree-dump "v64sf_logf" "vect" } }*/
TEST_FUN (float, -1.0, 500.0, log10f); /* { dg-final { scan-tree-dump "v64sf_log10f" "vect" } }*/
TEST_FUN (float, -1.0, 64.0, log2f); /* { dg-final { scan-tree-dump "v64sf_log2f" "vect" } }*/
TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf); /* { dg-final { scan-tree-dump "v64sf_powf" "vect" } }*/
TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf); /* { dg-final { scan-tree-dump "v64sf_remainderf" "vect" } }*/
TEST_FUN (float, -50.0, 50.0, rintf); /* { dg-final { scan-tree-dump "v64sf_rintf" "vect" } }*/
TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf); /* { dg-final { scan-tree-dump "v64sf_scalbf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, __builtin_significandf); /* { dg-final { scan-tree-dump "v64sf_significandf" "vect" } }*/
TEST_FUN (float, -3.14159265359, 3.14159265359, sinf); /* { dg-final { scan-tree-dump "v64sf_sinf" "vect" } }*/
TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf); /* { dg-final { scan-tree-dump "v64sf_sinhf" "vect" } }*/
TEST_FUN (float, -0.1, 10000.0, sqrtf); /* { dg-final { scan-tree-dump "v64sf_sqrtf" "vect" } }*/
TEST_FUN (float, -5.0, 5.0, tanf); /* { dg-final { scan-tree-dump "v64sf_tanf" "vect" } }*/
TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf); /* { dg-final { scan-tree-dump "v64sf_tanhf" "vect" } }*/
TEST_FUN (float, -10.0, 10.0, tgammaf); /* { dg-final { scan-tree-dump "v64sf_tgammaf" "vect" } }*/

TEST_FUN (double, -1.1, 1.1, acos); /* { dg-final { scan-tree-dump "v64df_acos" "vect" } }*/
TEST_FUN (double, -10, 10, acosh); /* { dg-final { scan-tree-dump "v64df_acosh" "vect" } }*/
TEST_FUN (double, -1.1, 1.1, asin); /* { dg-final { scan-tree-dump "v64df_asin" "vect" } }*/
TEST_FUN (double, -10, 10, asinh); /* { dg-final { scan-tree-dump "v64df_asinh" "vect" } }*/
TEST_FUN (double, -1.1, 1.1, atan); /* { dg-final { scan-tree-dump "v64df_atan" "vect" } }*/
TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2); /* { dg-final { scan-tree-dump "v64df_atan2" "vect" } }*/
TEST_FUN (double, -2.0, 2.0, atanh); /* { dg-final { scan-tree-dump "v64df_atanh" "vect" } }*/
TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign); /* { dg-final { scan-tree-dump "v64df_copysign" "vect" } }*/
TEST_FUN (double, -3.14159265359, 3.14159265359, cos); /* { dg-final { scan-tree-dump "v64df_cos" "vect" } }*/
TEST_FUN (double, -3.14159265359, 3.14159265359, cosh); /* { dg-final { scan-tree-dump "v64df_cosh" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, erf); /* { dg-final { scan-tree-dump "v64df_erf" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, exp); /* { dg-final { scan-tree-dump "v64df_exp" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, exp2); /* { dg-final { scan-tree-dump "v64df_exp2" "vect" } }*/
TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod); /* { dg-final { scan-tree-dump "v64df_fmod" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, gamma); /* { dg-final { scan-tree-dump "v64df_gamma" "vect" { xfail *-*-*} } }*/
TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot); /* { dg-final { scan-tree-dump "v64df_hypot" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, lgamma); /* { dg-final { scan-tree-dump "v64df_lgamma" "vect" { xfail *-*-*} } }*/
TEST_FUN (double, -1.0, 50.0, log); /* { dg-final { scan-tree-dump "v64df_log" "vect" } }*/
TEST_FUN (double, -1.0, 500.0, log10); /* { dg-final { scan-tree-dump "v64df_log10" "vect" } }*/
TEST_FUN (double, -1.0, 64.0, log2); /* { dg-final { scan-tree-dump "v64df_log2" "vect" { xfail *-*-*} } }*/
TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow); /* { dg-final { scan-tree-dump "v64df_pow" "vect" } }*/
TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder); /* { dg-final { scan-tree-dump "v64df_remainder" "vect" } }*/
TEST_FUN (double, -50.0, 50.0, rint); /* { dg-final { scan-tree-dump "v64df_rint" "vect" } }*/
TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb); /* { dg-final { scan-tree-dump "v64df_scalb" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, __builtin_significand); /* { dg-final { scan-tree-dump "v64df_significand" "vect" } }*/
TEST_FUN (double, -3.14159265359, 3.14159265359, sin); /* { dg-final { scan-tree-dump "v64df_sin" "vect" } }*/
TEST_FUN (double, -3.14159265359, 3.14159265359, sinh); /* { dg-final { scan-tree-dump "v64df_sinh" "vect" } }*/
TEST_FUN (double, -0.1, 10000.0, sqrt); /* { dg-final { scan-tree-dump "v64df_sqrt" "vect" } }*/
TEST_FUN (double, -5.0, 5.0, tan); /* { dg-final { scan-tree-dump "v64df_tan" "vect" } }*/
TEST_FUN (double, -3.14159265359, 3.14159265359, tanh); /* { dg-final { scan-tree-dump "v64df_tanh" "vect" } }*/
TEST_FUN (double, -10.0, 10.0, tgamma); /* { dg-final { scan-tree-dump "v64df_tgamma" "vect" } }*/

return failed;
}
8 changes: 8 additions & 0 deletions gcc/testsuite/gcc.target/gcn/simd-math-2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/* Check that the SIMD versions of math routines give the same (or
sufficiently close) results as their scalar equivalents. */

/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
/* { dg-set-target-env-var "GCN_STACK_SIZE" "3000000" } */

#include "simd-math-1.c"
Loading

0 comments on commit ce9cd72

Please sign in to comment.