Skip to content

Commit f916347

Browse files
committed
[SelectionDAG] Split vector types for atomic load
Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
1 parent 109bc60 commit f916347

File tree

3 files changed

+124
-0
lines changed

3 files changed

+124
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
965965
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
966966
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
967967
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
968+
void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
968969
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
969970
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
970971
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11611161
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
11621162
break;
11631163
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
1164+
case ISD::ATOMIC_LOAD:
1165+
SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
1166+
break;
11641167
case ISD::LOAD:
11651168
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
11661169
break;
@@ -1414,6 +1417,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
14141417
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
14151418
}
14161419

1420+
void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
1421+
SDValue &Hi) {
1422+
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
1423+
"Extended load during type legalization!");
1424+
SDLoc dl(LD);
1425+
EVT VT = LD->getValueType(0);
1426+
EVT LoVT, HiVT;
1427+
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1428+
1429+
SDValue Ch = LD->getChain();
1430+
SDValue Ptr = LD->getBasePtr();
1431+
1432+
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
1433+
EVT MemIntVT =
1434+
EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
1435+
SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch,
1436+
Ptr, LD->getMemOperand());
1437+
1438+
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
1439+
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
1440+
SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD);
1441+
SDValue ExtractHi =
1442+
DAG.getNode(ISD::SRL, dl, IntVT, ALD,
1443+
DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl));
1444+
ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi);
1445+
1446+
Lo = DAG.getBitcast(LoVT, ExtractLo);
1447+
Hi = DAG.getBitcast(HiVT, ExtractHi);
1448+
1449+
// Legalize the chain result - switch anything that used the old chain to
1450+
// use the new one.
1451+
ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
1452+
}
1453+
14171454
void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
14181455
MachinePointerInfo &MPI, SDValue &Ptr,
14191456
uint64_t *ScaledOffset) {

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
374374
ret <2 x float> %ret
375375
}
376376

377+
define <2 x half> @atomic_vec2_half(ptr %x) {
378+
; CHECK-O3-LABEL: atomic_vec2_half:
379+
; CHECK-O3: # %bb.0:
380+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
381+
; CHECK-O3-NEXT: retq
382+
;
383+
; CHECK-SSE-O3-LABEL: atomic_vec2_half:
384+
; CHECK-SSE-O3: # %bb.0:
385+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386+
; CHECK-SSE-O3-NEXT: retq
387+
;
388+
; CHECK-AVX-O3-LABEL: atomic_vec2_half:
389+
; CHECK-AVX-O3: # %bb.0:
390+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
391+
; CHECK-AVX-O3-NEXT: retq
392+
;
393+
; CHECK-O0-LABEL: atomic_vec2_half:
394+
; CHECK-O0: # %bb.0:
395+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
396+
; CHECK-O0-NEXT: retq
397+
;
398+
; CHECK-SSE-O0-LABEL: atomic_vec2_half:
399+
; CHECK-SSE-O0: # %bb.0:
400+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
401+
; CHECK-SSE-O0-NEXT: retq
402+
;
403+
; CHECK-AVX-O0-LABEL: atomic_vec2_half:
404+
; CHECK-AVX-O0: # %bb.0:
405+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
406+
; CHECK-AVX-O0-NEXT: retq
407+
%ret = load atomic <2 x half>, ptr %x acquire, align 4
408+
ret <2 x half> %ret
409+
}
410+
411+
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
412+
; CHECK-O3-LABEL: atomic_vec2_bfloat:
413+
; CHECK-O3: # %bb.0:
414+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
415+
; CHECK-O3-NEXT: retq
416+
;
417+
; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
418+
; CHECK-SSE-O3: # %bb.0:
419+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
420+
; CHECK-SSE-O3-NEXT: retq
421+
;
422+
; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
423+
; CHECK-AVX-O3: # %bb.0:
424+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
425+
; CHECK-AVX-O3-NEXT: retq
426+
;
427+
; CHECK-O0-LABEL: atomic_vec2_bfloat:
428+
; CHECK-O0: # %bb.0:
429+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
430+
; CHECK-O0-NEXT: retq
431+
;
432+
; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
433+
; CHECK-SSE-O0: # %bb.0:
434+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
435+
; CHECK-SSE-O0-NEXT: retq
436+
;
437+
; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
438+
; CHECK-AVX-O0: # %bb.0:
439+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
440+
; CHECK-AVX-O0-NEXT: retq
441+
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
442+
ret <2 x bfloat> %ret
443+
}
444+
377445
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
378446
; CHECK-O3-LABEL: atomic_vec1_ptr:
379447
; CHECK-O3: # %bb.0:
@@ -835,6 +903,24 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
835903
ret <4 x i16> %ret
836904
}
837905

906+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
907+
; CHECK-LABEL: atomic_vec4_half:
908+
; CHECK: # %bb.0:
909+
; CHECK-NEXT: movq (%rdi), %xmm0
910+
; CHECK-NEXT: retq
911+
%ret = load atomic <4 x half>, ptr %x acquire, align 8
912+
ret <4 x half> %ret
913+
}
914+
915+
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
916+
; CHECK-LABEL: atomic_vec4_bfloat:
917+
; CHECK: # %bb.0:
918+
; CHECK-NEXT: movq (%rdi), %xmm0
919+
; CHECK-NEXT: retq
920+
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
921+
ret <4 x bfloat> %ret
922+
}
923+
838924
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
839925
; CHECK-O3-LABEL: atomic_vec4_float_align:
840926
; CHECK-O3: # %bb.0:

0 commit comments

Comments
 (0)