Skip to content

Commit 1ea201d

Browse files
authored
[WoA] Remove extra barriers after ARM LSE instructions with MSVC (#169596)
c9821ab added extra fences after sequentially consistent stores for compatibility with MSVC's seq_cst loads (ldr+dmb). These extra fences should not be needed for ARM LSE instructions that have both acquire+release semantics, which results in a two way barrier, and should be enough for sequential consistency. Fixes #162345 Change-Id: I9148c73d0dcf3bf1b18a0915f96cac71ac1800f2
1 parent 848094c commit 1ea201d

File tree

7 files changed

+9463
-3561
lines changed

7 files changed

+9463
-3561
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,6 +2246,14 @@ class LLVM_ABI TargetLoweringBase {
22462246
return false;
22472247
}
22482248

2249+
/// Whether AtomicExpandPass should automatically insert a seq_cst trailing
2250+
/// fence without reducing the ordering for this atomic store. Defaults to
2251+
/// false.
2252+
virtual bool
2253+
shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const {
2254+
return false;
2255+
}
2256+
22492257
// The memory ordering that AtomicExpandPass should assign to a atomic
22502258
// instruction that it has lowered by adding fences. This can be used
22512259
// to "fold" one of the fences into the atomic instruction.
@@ -2254,13 +2262,6 @@ class LLVM_ABI TargetLoweringBase {
22542262
return AtomicOrdering::Monotonic;
22552263
}
22562264

2257-
/// Whether AtomicExpandPass should automatically insert a trailing fence
2258-
/// without reducing the ordering for this atomic. Defaults to false.
2259-
virtual bool
2260-
shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
2261-
return false;
2262-
}
2263-
22642265
/// Perform a load-linked operation on Addr, returning a "Value *" with the
22652266
/// corresponding pointee type. This may entail some non-trivial operations to
22662267
/// truncate or reconstruct types that will be illegal in the backend. See

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -345,21 +345,13 @@ bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
345345
if (FenceOrdering != AtomicOrdering::Monotonic) {
346346
MadeChange |= bracketInstWithFences(I, FenceOrdering);
347347
}
348-
} else if (I->hasAtomicStore() &&
349-
TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
350-
auto FenceOrdering = AtomicOrdering::Monotonic;
351-
if (SI)
352-
FenceOrdering = SI->getOrdering();
353-
else if (RMWI)
354-
FenceOrdering = RMWI->getOrdering();
355-
else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
356-
TargetLoweringBase::AtomicExpansionKind::LLSC)
357-
// LLSC is handled in expandAtomicCmpXchg().
358-
FenceOrdering = CASI->getSuccessOrdering();
359-
348+
} else if (TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(I) &&
349+
!(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
350+
TargetLoweringBase::AtomicExpansionKind::LLSC)) {
351+
// CmpXchg LLSC is handled in expandAtomicCmpXchg().
360352
IRBuilder Builder(I);
361-
if (auto TrailingFence =
362-
TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
353+
if (auto TrailingFence = TLI->emitTrailingFence(
354+
Builder, I, AtomicOrdering::SequentiallyConsistent)) {
363355
TrailingFence->moveAfter(I);
364356
MadeChange = true;
365357
}
@@ -1512,7 +1504,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
15121504
// necessary.
15131505
Builder.SetInsertPoint(SuccessBB);
15141506
if (ShouldInsertFencesForAtomic ||
1515-
TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1507+
TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(CI))
15161508
TLI->emitTrailingFence(Builder, CI, SuccessOrder);
15171509
Builder.CreateBr(ExitBB);
15181510

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29655,7 +29655,7 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic(
2965529655
return false;
2965629656
}
2965729657

29658-
bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
29658+
bool AArch64TargetLowering::shouldInsertTrailingSeqCstFenceForAtomicStore(
2965929659
const Instruction *I) const {
2966029660
// Store-Release instructions only provide seq_cst guarantees when paired with
2966129661
// Load-Acquire instructions. MSVC CRT does not use these instructions to
@@ -29664,19 +29664,24 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
2966429664
if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
2966529665
return false;
2966629666

29667-
switch (I->getOpcode()) {
29668-
default:
29667+
if (auto *SI = dyn_cast<StoreInst>(I))
29668+
return SI->getOrdering() == AtomicOrdering::SequentiallyConsistent;
29669+
29670+
auto *CAS = dyn_cast<AtomicCmpXchgInst>(I);
29671+
auto *RMW = dyn_cast<AtomicRMWInst>(I);
29672+
// Not a store.
29673+
if (!CAS && !RMW)
2966929674
return false;
29670-
case Instruction::AtomicCmpXchg:
29671-
return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
29672-
AtomicOrdering::SequentiallyConsistent;
29673-
case Instruction::AtomicRMW:
29674-
return cast<AtomicRMWInst>(I)->getOrdering() ==
29675-
AtomicOrdering::SequentiallyConsistent;
29676-
case Instruction::Store:
29677-
return cast<StoreInst>(I)->getOrdering() ==
29678-
AtomicOrdering::SequentiallyConsistent;
29679-
}
29675+
29676+
// Fence only needed for seq_cst.
29677+
if (CAS &&
29678+
CAS->getSuccessOrdering() != AtomicOrdering::SequentiallyConsistent)
29679+
return false;
29680+
if (RMW && RMW->getOrdering() != AtomicOrdering::SequentiallyConsistent)
29681+
return false;
29682+
29683+
// We do not need a fence if we have LSE atomics.
29684+
return !Subtarget->hasLSE();
2968029685
}
2968129686

2968229687
// Loads and stores less than 128-bits are already atomic; ones above that

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,8 @@ class AArch64TargetLowering : public TargetLowering {
349349
bool isOpSuitableForLSE128(const Instruction *I) const;
350350
bool isOpSuitableForRCPC3(const Instruction *I) const;
351351
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
352-
bool
353-
shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
352+
bool shouldInsertTrailingSeqCstFenceForAtomicStore(
353+
const Instruction *I) const override;
354354

355355
TargetLoweringBase::AtomicExpansionKind
356356
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;

0 commit comments

Comments
 (0)