Skip to content

Commit 648bd1f

Browse files
committed
Support vector float_extend from __bf16 to float.
It's supported by vector permutation with zero vector. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vector_bf2sf_with_vec_perm): New function. * config/i386/i386-protos.h (ix86_expand_vector_bf2sf_with_vec_perm): New Declare. * config/i386/mmx.md (extendv2bfv2sf2): New expander. * config/i386/sse.md (extend<sf_cvt_bf16_lower><mode>2): Ditto. (VF1_AVX512BW): New mode iterator. (sf_cvt_bf16): Add V4SF. (sf_cvt_bf16_lower): New mode attr. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-extendbf2sf.c: New test. * gcc.target/i386/sse2-extendbf2sf.c: New test.
1 parent a17acf4 commit 648bd1f

File tree

6 files changed

+144
-1
lines changed

6 files changed

+144
-1
lines changed

gcc/config/i386/i386-expand.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26879,5 +26879,44 @@ ix86_expand_vector_sf2bf_with_vec_perm (rtx dest, rtx src)
2687926879
emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), target, vperm_mode));
2688026880
}
2688126881

26882+
/* Implement extendv8bf2v8sf2 with vector permutation. */
26883+
void
26884+
ix86_expand_vector_bf2sf_with_vec_perm (rtx dest, rtx src)
26885+
{
26886+
machine_mode vperm_mode, src_mode = GET_MODE (src);
26887+
switch (src_mode)
26888+
{
26889+
case V16BFmode:
26890+
vperm_mode = V32BFmode;
26891+
break;
26892+
case V8BFmode:
26893+
vperm_mode = V16BFmode;
26894+
break;
26895+
case V4BFmode:
26896+
vperm_mode = V8BFmode;
26897+
break;
26898+
default:
26899+
gcc_unreachable ();
26900+
}
26901+
26902+
int nelt = GET_MODE_NUNITS (vperm_mode);
26903+
vec_perm_builder sel (nelt, nelt, 1);
26904+
sel.quick_grow (nelt);
26905+
for (int i = 0, k = 0, j = nelt; i != nelt; i++)
26906+
sel[i] = i & 1 ? j++ : k++;
26907+
26908+
vec_perm_indices indices (sel, 2, nelt);
26909+
26910+
rtx target = gen_reg_rtx (vperm_mode);
26911+
rtx op1 = lowpart_subreg (vperm_mode,
26912+
force_reg (src_mode, src),
26913+
src_mode);
26914+
rtx op0 = CONST0_RTX (vperm_mode);
26915+
bool ok = targetm.vectorize.vec_perm_const (vperm_mode, vperm_mode,
26916+
target, op0, op1, indices);
26917+
gcc_assert (ok);
26918+
emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), target, vperm_mode));
26919+
}
26920+
2688226921

2688326922
#include "gt-i386-expand.h"

gcc/config/i386/i386-protos.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ extern bool ix86_ternlog_operand_p (rtx op);
259259
extern rtx ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2,
260260
int idx, rtx target);
261261
extern void ix86_expand_vector_sf2bf_with_vec_perm (rtx, rtx);
262+
extern void ix86_expand_vector_bf2sf_with_vec_perm (rtx, rtx);
263+
262264

263265
#ifdef TREE_CODE
264266
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);

gcc/config/i386/mmx.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3012,6 +3012,24 @@
30123012
DONE;
30133013
})
30143014

3015+
(define_expand "extendv2bfv2sf2"
3016+
[(set (match_operand:V2SF 0 "register_operand")
3017+
(float_extend:V2SF
3018+
(match_operand:V2BF 1 "nonimmediate_operand")))]
3019+
"TARGET_SSE2 && TARGET_MMX_WITH_SSE"
3020+
{
3021+
rtx op0 = gen_reg_rtx (V4SFmode);
3022+
rtx op1 = gen_reg_rtx (V4BFmode);
3023+
3024+
emit_move_insn (op1, lowpart_subreg (V4BFmode,
3025+
force_reg (V2BFmode, operands[1]),
3026+
V2BFmode));
3027+
emit_insn (gen_extendv4bfv4sf2 (op0, op1));
3028+
3029+
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
3030+
DONE;
3031+
})
3032+
30153033
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30163034
;;
30173035
;; Parallel integral arithmetic

gcc/config/i386/sse.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,9 @@
539539
(define_mode_iterator VF1_AVX512VL
540540
[(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
541541

542+
(define_mode_iterator VF1_AVX512BW
543+
[(V16SF "TARGET_EVEX512 && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
544+
542545
(define_mode_iterator VF1_AVX10_2
543546
[(V16SF "TARGET_AVX10_2_512") V8SF V4SF])
544547

@@ -30957,7 +30960,11 @@
3095730960
[(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")])
3095830961
;; Converting from SF to BF
3095930962
(define_mode_attr sf_cvt_bf16
30960-
[(V8SF "V8BF") (V16SF "V16BF")])
30963+
[(V4SF "V4BF") (V8SF "V8BF") (V16SF "V16BF")])
30964+
30965+
(define_mode_attr sf_cvt_bf16_lower
30966+
[(V4SF "v4bf") (V8SF "v8bf") (V16SF "v16bf")])
30967+
3096130968
;; Mapping from BF to SF
3096230969
(define_mode_attr sf_bf16
3096330970
[(V4SF "V8BF") (V8SF "V16BF") (V16SF "V32BF")])
@@ -31116,6 +31123,17 @@
3111631123
}
3111731124
})
3111831125

31126+
(define_expand "extend<sf_cvt_bf16_lower><mode>2"
31127+
[(set (match_operand:VF1_AVX512BW 0 "register_operand")
31128+
(float_extend:VF1_AVX512BW
31129+
(match_operand:<sf_cvt_bf16> 1 "nonimmediate_operand")))]
31130+
"TARGET_SSE2"
31131+
{
31132+
ix86_expand_vector_bf2sf_with_vec_perm (operands[0], operands[1]);
31133+
DONE;
31134+
})
31135+
31136+
3111931137
(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
3112031138
[(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
3112131139
(float_truncate:<sf_cvt_bf16>
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
3+
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|vpunpcklwd)} 6 } } */
4+
5+
typedef float v4sf __attribute__((vector_size(16)));
6+
typedef float v8sf __attribute__((vector_size(32)));
7+
typedef float v16sf __attribute__((vector_size(64)));
8+
typedef __bf16 v4bf __attribute__((vector_size(8)));
9+
typedef __bf16 v8bf __attribute__((vector_size(16)));
10+
typedef __bf16 v16bf __attribute__((vector_size(32)));
11+
12+
v4sf
13+
foo (v4bf b, v4bf a)
14+
{
15+
return __builtin_convertvector (a, v4sf);
16+
}
17+
18+
v8sf
19+
foo2 (v8bf b, v8bf a)
20+
{
21+
return __builtin_convertvector (a, v8sf);
22+
}
23+
24+
v16sf
25+
foo3 (v16bf b, v16bf a)
26+
{
27+
return __builtin_convertvector (a, v16sf);
28+
}
29+
30+
v4sf
31+
foo_mem (v4bf* a)
32+
{
33+
return __builtin_convertvector (*a, v4sf);
34+
}
35+
36+
v8sf
37+
foo2_mem (v8bf* a)
38+
{
39+
return __builtin_convertvector (*a, v8sf);
40+
}
41+
42+
v16sf
43+
foo3_mem (v16bf* a)
44+
{
45+
return __builtin_convertvector (*a, v16sf);
46+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-msse2 -O2" } */
3+
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|punpcklwd)} 2 { target { ! ia32 } } } } */
4+
5+
typedef float v2sf __attribute__((vector_size(8)));
6+
typedef __bf16 v2bf __attribute__((vector_size(4)));
7+
8+
v2sf
9+
foo (v2bf b, v2bf a)
10+
{
11+
return __builtin_convertvector (a, v2sf);
12+
}
13+
14+
15+
v2sf
16+
foo_mem (v2bf* a)
17+
{
18+
return __builtin_convertvector (*a, v2sf);
19+
}
20+

0 commit comments

Comments
 (0)