Skip to content

Commit 25e3c7c

Browse files
author
Jenkins
committed
SWDEV-179954 - OpenCL/LC - Merge branch amd-master into amd-common
Change-Id: I0d15ef6392dcbba4bb566f2ca0f55cf9d485fa56
2 parents 8e3c9a7 + ccc4d07 commit 25e3c7c

20 files changed

+520
-536
lines changed

lib/Target/WebAssembly/WebAssemblyCFGSort.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,10 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
259259
CompareBlockNumbersBackwards>
260260
Ready;
261261

262-
RegionInfo SUI(MLI, WEI);
262+
RegionInfo RI(MLI, WEI);
263263
SmallVector<Entry, 4> Entries;
264264
for (MachineBasicBlock *MBB = &MF.front();;) {
265-
const Region *R = SUI.getRegionFor(MBB);
265+
const Region *R = RI.getRegionFor(MBB);
266266
if (R) {
267267
// If MBB is a region header, add it to the active region list. We can't
268268
// put any blocks that it doesn't dominate until we see the end of the
@@ -353,7 +353,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
353353

354354
for (auto &MBB : MF) {
355355
assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
356-
const Region *Region = SUI.getRegionFor(&MBB);
356+
const Region *Region = RI.getRegionFor(&MBB);
357357

358358
if (Region && &MBB == Region->getHeader()) {
359359
if (Region->isLoop()) {
@@ -378,7 +378,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
378378
for (auto Pred : MBB.predecessors())
379379
assert(Pred->getNumber() < MBB.getNumber() &&
380380
"Non-loop-header predecessors should be topologically sorted");
381-
assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
381+
assert(OnStack.count(RI.getRegionFor(&MBB)) &&
382382
"Blocks must be nested in their regions");
383383
}
384384
while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30885,33 +30885,39 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
3088530885

3088630886
// Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
3088730887
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
30888-
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
30889-
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
30890-
unsigned MaxScale = 64 / MaskEltSize;
30891-
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
30892-
bool Match = true;
30893-
unsigned NumDstElts = NumMaskElts / Scale;
30894-
for (unsigned i = 0; i != NumDstElts && Match; ++i) {
30895-
Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
30896-
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
30897-
}
30898-
if (Match) {
30899-
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
30900-
MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
30901-
MVT::getIntegerVT(MaskEltSize);
30902-
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
30903-
30904-
if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
30905-
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
30906-
30907-
if (SrcVT.getVectorNumElements() == NumDstElts)
30908-
Shuffle = unsigned(ISD::ZERO_EXTEND);
30909-
else
30910-
Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
30888+
if ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
30889+
(MaskVT.is256BitVector() && Subtarget.hasInt256())) {
30890+
// Allow this with FloatDomain if we'll be able to fold the load.
30891+
SDValue BC1 = peekThroughOneUseBitcasts(V1);
30892+
if (AllowIntDomain ||
30893+
(BC1.hasOneUse() && BC1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
30894+
MayFoldLoad(BC1.getOperand(0)))) {
30895+
unsigned MaxScale = 64 / MaskEltSize;
30896+
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
30897+
bool Match = true;
30898+
unsigned NumDstElts = NumMaskElts / Scale;
30899+
for (unsigned i = 0; i != NumDstElts && Match; ++i) {
30900+
Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
30901+
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
30902+
}
30903+
if (Match) {
30904+
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
30905+
MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType()
30906+
: MVT::getIntegerVT(MaskEltSize);
30907+
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
3091130908

30912-
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
30913-
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
30914-
return true;
30909+
if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
30910+
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
30911+
30912+
if (SrcVT.getVectorNumElements() == NumDstElts)
30913+
Shuffle = unsigned(ISD::ZERO_EXTEND);
30914+
else
30915+
Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
30916+
30917+
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
30918+
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
30919+
return true;
30920+
}
3091530921
}
3091630922
}
3091730923
}
@@ -42616,8 +42622,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
4261642622
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
4261742623

4261842624
// Attempt to combine as a shuffle.
42619-
// TODO: SSE41 support
42620-
if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
42625+
if (Subtarget.hasSSE41() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
4262142626
SDValue Op(N, 0);
4262242627
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
4262342628
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))

lib/Target/X86/X86InstrArithmetic.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -605,13 +605,13 @@ def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
605605
Imm8, i8imm, relocImm8_su, i8imm, invalid_node,
606606
0, OpSizeFixed, 0>;
607607
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
608-
Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8,
608+
Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8_su,
609609
1, OpSize16, 0>;
610610
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
611-
Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8,
611+
Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8_su,
612612
1, OpSize32, 0>;
613613
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
614-
Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8,
614+
Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8_su,
615615
1, OpSizeFixed, 1>;
616616

617617
/// ITy - This instruction base class takes the type info for the instruction.

lib/Target/X86/X86InstrCompiler.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,8 +1994,8 @@ def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
19941994
def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
19951995

19961996
// sub reg, relocImm
1997-
def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8:$src2),
1998-
(SUB64ri8 GR64:$src1, i64relocImmSExt8:$src2)>;
1997+
def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
1998+
(SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
19991999

20002000
// mul reg, reg
20012001
def : Pat<(mul GR16:$src1, GR16:$src2),

lib/Target/X86/X86InstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,19 @@ def relocImm32_su : PatLeaf<(i32 relocImm), [{
10041004
return !shouldAvoidImmediateInstFormsForSize(N);
10051005
}]>;
10061006

1007+
def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
1008+
return !shouldAvoidImmediateInstFormsForSize(N);
1009+
}]>;
1010+
def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
1011+
return !shouldAvoidImmediateInstFormsForSize(N);
1012+
}]>;
1013+
def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
1014+
return !shouldAvoidImmediateInstFormsForSize(N);
1015+
}]>;
1016+
1017+
def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
1018+
return !shouldAvoidImmediateInstFormsForSize(N);
1019+
}]>;
10071020
def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
10081021
return !shouldAvoidImmediateInstFormsForSize(N);
10091022
}]>;

test/CodeGen/X86/cast-vsel.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,10 @@ define <8 x i32> @zext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %
8585
; SSE41-NEXT: cmpltps %xmm2, %xmm0
8686
; SSE41-NEXT: packssdw %xmm1, %xmm0
8787
; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm5
88+
; SSE41-NEXT: pxor %xmm1, %xmm1
8889
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
89-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
90-
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
90+
; SSE41-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
91+
; SSE41-NEXT: movdqa %xmm5, %xmm1
9192
; SSE41-NEXT: retq
9293
;
9394
; AVX1-LABEL: zext:

test/CodeGen/X86/combine-pmuldq.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,16 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
9191
define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) {
9292
; SSE-LABEL: combine_zext_pmuludq_256:
9393
; SSE: # %bb.0:
94-
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
95-
; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
96-
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
97-
; SSE-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
94+
; SSE-NEXT: movdqa %xmm0, %xmm4
95+
; SSE-NEXT: pxor %xmm3, %xmm3
9896
; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
97+
; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
9998
; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
100-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883]
101-
; SSE-NEXT: pmuludq %xmm1, %xmm0
102-
; SSE-NEXT: pmuludq %xmm1, %xmm2
103-
; SSE-NEXT: pmuludq %xmm1, %xmm4
99+
; SSE-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
100+
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [715827883,715827883]
101+
; SSE-NEXT: pmuludq %xmm3, %xmm0
102+
; SSE-NEXT: pmuludq %xmm3, %xmm4
103+
; SSE-NEXT: pmuludq %xmm3, %xmm2
104104
; SSE-NEXT: pmuludq %xmm1, %xmm3
105105
; SSE-NEXT: movdqa %xmm4, %xmm1
106106
; SSE-NEXT: retq

test/CodeGen/X86/combine-shl.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,11 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
316316
;
317317
; SSE41-LABEL: combine_vec_shl_zext_lshr0:
318318
; SSE41: # %bb.0:
319-
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
320-
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
321-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
322-
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
323-
; SSE41-NEXT: movdqa %xmm2, %xmm0
319+
; SSE41-NEXT: movdqa %xmm0, %xmm1
320+
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
321+
; SSE41-NEXT: pxor %xmm2, %xmm2
322+
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
323+
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
324324
; SSE41-NEXT: retq
325325
;
326326
; AVX-LABEL: combine_vec_shl_zext_lshr0:
@@ -362,12 +362,13 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
362362
;
363363
; SSE41-LABEL: combine_vec_shl_zext_lshr1:
364364
; SSE41: # %bb.0:
365-
; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm0
366-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
367-
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
368-
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
369-
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
365+
; SSE41-NEXT: movdqa %xmm0, %xmm1
366+
; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm1
367+
; SSE41-NEXT: pxor %xmm2, %xmm2
368+
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
369+
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
370370
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
371+
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
371372
; SSE41-NEXT: retq
372373
;
373374
; AVX-LABEL: combine_vec_shl_zext_lshr1:

test/CodeGen/X86/immediate_merging.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ define i32 @foo() optsize {
1919
; X86-NEXT: movl $1234, %eax # imm = 0x4D2
2020
; X86-NEXT: movl %eax, a
2121
; X86-NEXT: movl %eax, b
22-
; X86-NEXT: movl $12, c
23-
; X86-NEXT: cmpl $12, e
22+
; X86-NEXT: movl $12, %eax
23+
; X86-NEXT: movl %eax, c
24+
; X86-NEXT: cmpl %eax, e
2425
; X86-NEXT: jne .LBB0_2
2526
; X86-NEXT: # %bb.1: # %if.then
2627
; X86-NEXT: movl $1, x
@@ -37,8 +38,9 @@ define i32 @foo() optsize {
3738
; X64-NEXT: movl $1234, %eax # imm = 0x4D2
3839
; X64-NEXT: movl %eax, {{.*}}(%rip)
3940
; X64-NEXT: movl %eax, {{.*}}(%rip)
40-
; X64-NEXT: movl $12, {{.*}}(%rip)
41-
; X64-NEXT: cmpl $12, {{.*}}(%rip)
41+
; X64-NEXT: movl $12, %eax
42+
; X64-NEXT: movl %eax, {{.*}}(%rip)
43+
; X64-NEXT: cmpl %eax, {{.*}}(%rip)
4244
; X64-NEXT: jne .LBB0_2
4345
; X64-NEXT: # %bb.1: # %if.then
4446
; X64-NEXT: movl $1, {{.*}}(%rip)

test/CodeGen/X86/immediate_merging64.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
1111
; CHECK: # %bb.0:
1212
; CHECK-NEXT: movq $-1, %rax
1313
; CHECK-NEXT: movq %rax, (%rsi)
14-
; CHECK-NEXT: cmpq $-1, %rdi
14+
; CHECK-NEXT: cmpq %rax, %rdi
1515
; CHECK-NEXT: sete %al
1616
; CHECK-NEXT: retq
1717
store i64 -1, i64* %b, align 8

0 commit comments

Comments
 (0)