Skip to content

Commit

Permalink
Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobb…
Browse files Browse the repository at this point in the history
…er(V4DImode) deosn't contains all SSE_REGS.

If the function desn't clobber any sse registers or only clobber
128-bit part, then vzeroupper isn't issued before the function exit.
the status not CLEAN but ANY after the function.

Also for sibling_call, it's safe to issue an vzeroupper. Also there
could be missing vzeroupper since there's no mode_exit for
sibling_call_p.

gcc/ChangeLog:

	PR target/112891
	* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
	AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
	align with ix86_avx_u128_mode_needed.
	(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
	sibling_call.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr112891.c: New test.
	* gcc.target/i386/pr112891-2.c: New test.
  • Loading branch information
algebra84 committed Dec 12, 2023
1 parent d965335 commit fc62716
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 3 deletions.
22 changes: 19 additions & 3 deletions gcc/config/i386/i386.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
vzeroupper if all SSE registers are clobbered. */
const function_abi &abi = insn_callee_abi (insn);
if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
|| !hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
abi.mode_clobbers (V4DImode)))
/* Should be safe to issue an vzeroupper before sibling_call_p.
Also there not mode_exit for sibling_call, so there could be
missing vzeroupper for that. */
|| !(SIBLING_CALL_P (insn)
|| hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
abi.mode_clobbers (V4DImode))))
return AVX_U128_ANY;

return AVX_U128_CLEAN;
Expand Down Expand Up @@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
bool avx_upper_reg_found = false;
note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);

return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
if (avx_upper_reg_found)
return AVX_U128_DIRTY;

/* If the function desn't clobber any sse registers or only clobber
128-bit part, Then vzeroupper isn't issued before the function exit.
the status not CLEAN but ANY after the function. */
const function_abi &abi = insn_callee_abi (insn);
if (!(SIBLING_CALL_P (insn)
|| hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
abi.mode_clobbers (V4DImode))))
return AVX_U128_ANY;

return AVX_U128_CLEAN;
}

/* Otherwise, return current mode. Remember that if insn
Expand Down
30 changes: 30 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr112891-2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-options "-mavx2 -O3" } */
/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */

void
__attribute__((noinline))
bar (double* a)
{
a[0] = 1.0;
a[1] = 2.0;
}

double
__attribute__((noinline))
foo (double* __restrict a, double* b)
{
a[0] += b[0];
a[1] += b[1];
a[2] += b[2];
a[3] += b[3];
bar (b);
return a[5] + b[5];
}

double
foo1 (double* __restrict a, double* b)
{
double c = foo (a, b);
return __builtin_exp (c);
}
29 changes: 29 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr112891.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-options "-mavx2 -O3" } */
/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */

void
__attribute__((noinline))
bar (double* a)
{
a[0] = 1.0;
a[1] = 2.0;
}

void
__attribute__((noinline))
foo (double* __restrict a, double* b)
{
a[0] += b[0];
a[1] += b[1];
a[2] += b[2];
a[3] += b[3];
bar (b);
}

double
foo1 (double* __restrict a, double* b)
{
foo (a, b);
return __builtin_exp (b[1]);
}

0 comments on commit fc62716

Please sign in to comment.