Skip to content

Commit

Permalink
LoongArch: Provide fmin/fmax RTL pattern for vectors
Browse files Browse the repository at this point in the history
We already had smin/smax RTL pattern using vfmin/vfmax instructions.
But for smin/smax, it's unspecified what will happen if either operand
contains any NaN operands.  So we would not vectorize the loop with
-fno-finite-math-only (the default for all optimization levels expect
-Ofast).

But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
can also use them and vectorize the loop.

gcc/ChangeLog:

	* config/loongarch/simd.md (fmax<mode>3): New define_insn.
	(fmin<mode>3): Likewise.
	(reduc_fmax_scal_<mode>3): New define_expand.
	(reduc_fmin_scal_<mode>3): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/vfmax-vfmin.c: New test.
  • Loading branch information
xry111 committed Jan 3, 2024
1 parent a43bd82 commit 87acfc3
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
31 changes: 31 additions & 0 deletions gcc/config/loongarch/simd.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,37 @@
[(set_attr "type" "simd_fcmp")
(set_attr "mode" "<MODE>")])

; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
; of smin/smax though, see PR105414 and PR107013.

(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])

(define_insn "<fmaxmin><mode>3"
[(set (match_operand:FVEC 0 "register_operand" "=f")
(unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
(match_operand:FVEC 2 "register_operand" "f")]
UNSPEC_FMAXMIN))]
""
"<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
[(set_attr "type" "simd_fminmax")
(set_attr "mode" "<MODE>")])

;; ... and also reduc operations.
(define_expand "reduc_<fmaxmin>_scal_<mode>"
[(match_operand:<UNITMODE> 0 "register_operand")
(match_operand:FVEC 1 "register_operand")
(const_int UNSPEC_FMAXMIN)]
""
{
rtx tmp = gen_reg_rtx (<MODE>mode);
loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
const0_rtx));
DONE;
})

; The LoongArch SX Instructions.
(include "lsx.md")

Expand Down
31 changes: 31 additions & 0 deletions gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mtune=la464 -mlasx" } */
/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
/* { dg-final { scan-assembler "\txvfmax\\.s" } } */

#define T(OP) __typeof__ (__builtin_##OP (0, 0))

#define TEST(OP, LEN) \
void \
test_##OP##LEN (T (OP) *restrict dest, \
const T (OP) *restrict src1, \
const T (OP) *restrict src2) \
{ \
for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
dest[i] = __builtin_##OP (src1[i], src2[i]); \
}

TEST(fmin, 16)
TEST(fmax, 16)
TEST(fmin, 32)
TEST(fmax, 32)
TEST(fminf, 16)
TEST(fmaxf, 16)
TEST(fminf, 32)
TEST(fmaxf, 32)

0 comments on commit 87acfc3

Please sign in to comment.