Skip to content

Commit d7d7c3b

Browse files
committed
[SelectionDAG][X86] Remove unused elements from atomic vector.
After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8
1 parent 0d766dd commit d7d7c3b

File tree

6 files changed

+69
-187
lines changed

6 files changed

+69
-187
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,7 +1835,7 @@ class SelectionDAG {
18351835
/// chain to the token factor. This ensures that the new memory node will have
18361836
/// the same relative memory dependency position as the old load. Returns the
18371837
/// new merged load chain.
1838-
SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
1838+
SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
18391839

18401840
/// Topological-sort the AllNodes list and a
18411841
/// assign a unique node id for each node in the DAG based on their
@@ -2259,7 +2259,7 @@ class SelectionDAG {
22592259
/// merged. Check that both are nonvolatile and if LD is loading
22602260
/// 'Bytes' bytes from a location that is 'Dist' units away from the
22612261
/// location that the 'Base' load is loading from.
2262-
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
2262+
bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
22632263
unsigned Bytes, int Dist) const;
22642264

22652265
/// Infer alignment of a load / store address. Return std::nullopt if it

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12121,7 +12121,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
1212112121
return TokenFactor;
1212212122
}
1212312123

12124-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
12124+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
1212512125
SDValue NewMemOp) {
1212612126
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
1212712127
SDValue OldChain = SDValue(OldLoad, 1);
@@ -12814,17 +12814,21 @@ std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
1281412814
getBuildVector(NewOvVT, dl, OvScalars));
1281512815
}
1281612816

12817-
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
12818-
LoadSDNode *Base,
12817+
bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
12818+
MemSDNode *Base,
1281912819
unsigned Bytes,
1282012820
int Dist) const {
1282112821
if (LD->isVolatile() || Base->isVolatile())
1282212822
return false;
12823-
// TODO: probably too restrictive for atomics, revisit
12824-
if (!LD->isSimple())
12825-
return false;
12826-
if (LD->isIndexed() || Base->isIndexed())
12827-
return false;
12823+
if (auto Ld = dyn_cast<LoadSDNode>(LD)) {
12824+
if (!Ld->isSimple())
12825+
return false;
12826+
if (Ld->isIndexed())
12827+
return false;
12828+
}
12829+
if (auto Ld = dyn_cast<LoadSDNode>(Base))
12830+
if (Ld->isIndexed())
12831+
return false;
1282812832
if (LD->getChain() != Base->getChain())
1282912833
return false;
1283012834
EVT VT = LD->getMemoryVT();

llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
195195
}
196196

197197
/// Parses tree in Ptr for base, index, offset addresses.
198-
static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
199-
const SelectionDAG &DAG) {
198+
template <typename T>
199+
static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
200200
SDValue Ptr = N->getBasePtr();
201201

202202
// (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
206206
bool IsIndexSignExt = false;
207207

208208
// pre-inc/pre-dec ops are components of EA.
209-
if (N->getAddressingMode() == ISD::PRE_INC) {
210-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
211-
Offset += C->getSExtValue();
212-
else // If unknown, give up now.
213-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
214-
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
215-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
216-
Offset -= C->getSExtValue();
217-
else // If unknown, give up now.
218-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
209+
if constexpr (std::is_same_v<T, LSBaseSDNode>) {
210+
if (N->getAddressingMode() == ISD::PRE_INC) {
211+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
212+
Offset += C->getSExtValue();
213+
else // If unknown, give up now.
214+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
215+
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
216+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
217+
Offset -= C->getSExtValue();
218+
else // If unknown, give up now.
219+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
220+
}
219221
}
220222

221223
// Consume constant adds & ors with appropriate masking.
@@ -300,8 +302,10 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
300302

301303
BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
302304
const SelectionDAG &DAG) {
305+
if (const auto *AN = dyn_cast<AtomicSDNode>(N))
306+
return matchSDNode(AN, DAG);
303307
if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
304-
return matchLSNode(LS0, DAG);
308+
return matchSDNode(LS0, DAG);
305309
if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
306310
if (LN->hasOffset())
307311
return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5227,7 +5227,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
52275227
L = DAG.getPtrExtOrTrunc(L, dl, VT);
52285228

52295229
setValue(&I, L);
5230-
DAG.setRoot(OutChain);
5230+
5231+
if (VT.isVector())
5232+
DAG.setRoot(InChain);
5233+
else
5234+
DAG.setRoot(OutChain);
52315235
}
52325236

52335237
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7122,15 +7122,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
71227122
}
71237123

71247124
// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
7125-
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
7126-
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7127-
auto *BaseLd = cast<LoadSDNode>(Elt);
7128-
if (!BaseLd->isSimple())
7129-
return false;
7125+
static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) {
7126+
if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) {
71307127
Ld = BaseLd;
71317128
ByteOffset = 0;
71327129
return true;
7133-
}
7130+
} else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt))
7131+
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7132+
if (!BaseLd->isSimple())
7133+
return false;
7134+
Ld = BaseLd;
7135+
ByteOffset = 0;
7136+
return true;
7137+
}
71347138

71357139
switch (Elt.getOpcode()) {
71367140
case ISD::BITCAST:
@@ -7183,7 +7187,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
71837187
APInt ZeroMask = APInt::getZero(NumElems);
71847188
APInt UndefMask = APInt::getZero(NumElems);
71857189

7186-
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
7190+
SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr);
71877191
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
71887192

71897193
// For each element in the initializer, see if we've found a load, zero or an
@@ -7233,7 +7237,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72337237
EVT EltBaseVT = EltBase.getValueType();
72347238
assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
72357239
"Register/Memory size mismatch");
7236-
LoadSDNode *LDBase = Loads[FirstLoadedElt];
7240+
MemSDNode *LDBase = Loads[FirstLoadedElt];
72377241
assert(LDBase && "Did not find base load for merging consecutive loads");
72387242
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
72397243
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
@@ -7247,8 +7251,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72477251

72487252
// Check to see if the element's load is consecutive to the base load
72497253
// or offset from a previous (already checked) load.
7250-
auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
7251-
LoadSDNode *Ld = Loads[EltIdx];
7254+
auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) {
7255+
MemSDNode *Ld = Loads[EltIdx];
72527256
int64_t ByteOffset = ByteOffsets[EltIdx];
72537257
if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
72547258
int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
@@ -7276,7 +7280,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72767280
}
72777281
}
72787282

7279-
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
7283+
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) {
72807284
auto MMOFlags = LDBase->getMemOperand()->getFlags();
72817285
assert(LDBase->isSimple() &&
72827286
"Cannot merge volatile or atomic loads.");
@@ -9319,8 +9323,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
93199323
{
93209324
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
93219325
if (SDValue LD =
9322-
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
9326+
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) {
93239327
return LD;
9328+
}
93249329
}
93259330

93269331
// If this is a splat of pairs of 32-bit elements, we can use a narrower

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 16 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -205,63 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205
}
206206

207207
define <2 x half> @atomic_vec2_half(ptr %x) {
208-
; CHECK3-LABEL: atomic_vec2_half:
209-
; CHECK3: ## %bb.0:
210-
; CHECK3-NEXT: movl (%rdi), %eax
211-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212-
; CHECK3-NEXT: shrl $16, %eax
213-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215-
; CHECK3-NEXT: retq
216-
;
217-
; CHECK0-LABEL: atomic_vec2_half:
218-
; CHECK0: ## %bb.0:
219-
; CHECK0-NEXT: movl (%rdi), %eax
220-
; CHECK0-NEXT: movl %eax, %ecx
221-
; CHECK0-NEXT: shrl $16, %ecx
222-
; CHECK0-NEXT: movw %cx, %dx
223-
; CHECK0-NEXT: ## implicit-def: $ecx
224-
; CHECK0-NEXT: movw %dx, %cx
225-
; CHECK0-NEXT: ## implicit-def: $xmm1
226-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227-
; CHECK0-NEXT: movw %ax, %cx
228-
; CHECK0-NEXT: ## implicit-def: $eax
229-
; CHECK0-NEXT: movw %cx, %ax
230-
; CHECK0-NEXT: ## implicit-def: $xmm0
231-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233-
; CHECK0-NEXT: retq
208+
; CHECK-LABEL: atomic_vec2_half:
209+
; CHECK: ## %bb.0:
210+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211+
; CHECK-NEXT: retq
234212
%ret = load atomic <2 x half>, ptr %x acquire, align 4
235213
ret <2 x half> %ret
236214
}
237215

238216
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
239-
; CHECK3-LABEL: atomic_vec2_bfloat:
240-
; CHECK3: ## %bb.0:
241-
; CHECK3-NEXT: movl (%rdi), %eax
242-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243-
; CHECK3-NEXT: shrl $16, %eax
244-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246-
; CHECK3-NEXT: retq
247-
;
248-
; CHECK0-LABEL: atomic_vec2_bfloat:
249-
; CHECK0: ## %bb.0:
250-
; CHECK0-NEXT: movl (%rdi), %eax
251-
; CHECK0-NEXT: movl %eax, %ecx
252-
; CHECK0-NEXT: shrl $16, %ecx
253-
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254-
; CHECK0-NEXT: movw %ax, %dx
255-
; CHECK0-NEXT: ## implicit-def: $eax
256-
; CHECK0-NEXT: movw %dx, %ax
257-
; CHECK0-NEXT: ## implicit-def: $xmm0
258-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259-
; CHECK0-NEXT: ## implicit-def: $eax
260-
; CHECK0-NEXT: movw %cx, %ax
261-
; CHECK0-NEXT: ## implicit-def: $xmm1
262-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264-
; CHECK0-NEXT: retq
217+
; CHECK-LABEL: atomic_vec2_bfloat:
218+
; CHECK: ## %bb.0:
219+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+
; CHECK-NEXT: retq
265221
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
266222
ret <2 x bfloat> %ret
267223
}
@@ -439,110 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
439395
}
440396

441397
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
442-
; CHECK3-LABEL: atomic_vec4_half:
443-
; CHECK3: ## %bb.0:
444-
; CHECK3-NEXT: movq (%rdi), %rax
445-
; CHECK3-NEXT: movl %eax, %ecx
446-
; CHECK3-NEXT: shrl $16, %ecx
447-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449-
; CHECK3-NEXT: movq %rax, %rcx
450-
; CHECK3-NEXT: shrq $32, %rcx
451-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452-
; CHECK3-NEXT: shrq $48, %rax
453-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456-
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457-
; CHECK3-NEXT: retq
458-
;
459-
; CHECK0-LABEL: atomic_vec4_half:
460-
; CHECK0: ## %bb.0:
461-
; CHECK0-NEXT: movq (%rdi), %rax
462-
; CHECK0-NEXT: movl %eax, %ecx
463-
; CHECK0-NEXT: shrl $16, %ecx
464-
; CHECK0-NEXT: movw %cx, %dx
465-
; CHECK0-NEXT: ## implicit-def: $ecx
466-
; CHECK0-NEXT: movw %dx, %cx
467-
; CHECK0-NEXT: ## implicit-def: $xmm2
468-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469-
; CHECK0-NEXT: movw %ax, %dx
470-
; CHECK0-NEXT: ## implicit-def: $ecx
471-
; CHECK0-NEXT: movw %dx, %cx
472-
; CHECK0-NEXT: ## implicit-def: $xmm0
473-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474-
; CHECK0-NEXT: movq %rax, %rcx
475-
; CHECK0-NEXT: shrq $32, %rcx
476-
; CHECK0-NEXT: movw %cx, %dx
477-
; CHECK0-NEXT: ## implicit-def: $ecx
478-
; CHECK0-NEXT: movw %dx, %cx
479-
; CHECK0-NEXT: ## implicit-def: $xmm1
480-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481-
; CHECK0-NEXT: shrq $48, %rax
482-
; CHECK0-NEXT: movw %ax, %cx
483-
; CHECK0-NEXT: ## implicit-def: $eax
484-
; CHECK0-NEXT: movw %cx, %ax
485-
; CHECK0-NEXT: ## implicit-def: $xmm3
486-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489-
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490-
; CHECK0-NEXT: retq
398+
; CHECK-LABEL: atomic_vec4_half:
399+
; CHECK: ## %bb.0:
400+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401+
; CHECK-NEXT: retq
491402
%ret = load atomic <4 x half>, ptr %x acquire, align 8
492403
ret <4 x half> %ret
493404
}
494405

495406
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
496-
; CHECK3-LABEL: atomic_vec4_bfloat:
497-
; CHECK3: ## %bb.0:
498-
; CHECK3-NEXT: movq (%rdi), %rax
499-
; CHECK3-NEXT: movq %rax, %rcx
500-
; CHECK3-NEXT: movq %rax, %rdx
501-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502-
; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503-
; CHECK3-NEXT: shrl $16, %eax
504-
; CHECK3-NEXT: shrq $32, %rcx
505-
; CHECK3-NEXT: shrq $48, %rdx
506-
; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511-
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512-
; CHECK3-NEXT: retq
513-
;
514-
; CHECK0-LABEL: atomic_vec4_bfloat:
515-
; CHECK0: ## %bb.0:
516-
; CHECK0-NEXT: movq (%rdi), %rax
517-
; CHECK0-NEXT: movl %eax, %ecx
518-
; CHECK0-NEXT: shrl $16, %ecx
519-
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520-
; CHECK0-NEXT: movw %ax, %dx
521-
; CHECK0-NEXT: movq %rax, %rsi
522-
; CHECK0-NEXT: shrq $32, %rsi
523-
; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524-
; CHECK0-NEXT: shrq $48, %rax
525-
; CHECK0-NEXT: movw %ax, %di
526-
; CHECK0-NEXT: ## implicit-def: $eax
527-
; CHECK0-NEXT: movw %di, %ax
528-
; CHECK0-NEXT: ## implicit-def: $xmm0
529-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530-
; CHECK0-NEXT: ## implicit-def: $eax
531-
; CHECK0-NEXT: movw %si, %ax
532-
; CHECK0-NEXT: ## implicit-def: $xmm1
533-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535-
; CHECK0-NEXT: ## implicit-def: $eax
536-
; CHECK0-NEXT: movw %dx, %ax
537-
; CHECK0-NEXT: ## implicit-def: $xmm0
538-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539-
; CHECK0-NEXT: ## implicit-def: $eax
540-
; CHECK0-NEXT: movw %cx, %ax
541-
; CHECK0-NEXT: ## implicit-def: $xmm2
542-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544-
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545-
; CHECK0-NEXT: retq
407+
; CHECK-LABEL: atomic_vec4_bfloat:
408+
; CHECK: ## %bb.0:
409+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410+
; CHECK-NEXT: retq
546411
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
547412
ret <4 x bfloat> %ret
548413
}

0 commit comments

Comments
 (0)