-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE #165910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
RolandF77
wants to merge
5
commits into
llvm:main
Choose a base branch
from
RolandF77:lowervpins
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+274
−0
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
@llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesMap EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length. Full diff: https://github.com/llvm/llvm-project/pull/165910.diff 5 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 17f04d0fd05e8..f303d237e5cc2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -652,6 +652,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
+ if (Subtarget.isISA3_0() && isPPC64) {
+ setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
@@ -11909,6 +11920,59 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}
+static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
+ SelectionDAG &DAG) {
+ SDLoc dl(Val);
+ EVT VT = Val->getValueType(0);
+ unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
+ unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
+ SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
+ return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
+}
+
+SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
+ auto VPLD = cast<VPLoadSDNode>(Op);
+ bool Future = Subtarget.isISAFuture();
+ SDLoc dl(Op);
+ assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
+ "Mask predication not supported");
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
+ unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
+ unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ Len = AdjustLength(Len, EltBits, !Future, DAG);
+ SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+ VPLD->getOperand(1), Len};
+ SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDValue VPL =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
+ VPLD->getMemoryVT(), VPLD->getMemOperand());
+ return VPL;
+}
+
+SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
+ auto VPST = cast<VPStoreSDNode>(Op);
+ assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
+ "Mask predication not supported");
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
+ unsigned EltBits =
+ Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
+ bool Future = Subtarget.isISAFuture();
+ unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
+ Len = AdjustLength(Len, EltBits, !Future, DAG);
+ SDValue Ops[] = {
+ VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
+ VPST->getOperand(2), Len};
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue VPS =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+ VPST->getMemoryVT(), VPST->getMemOperand());
+ return VPS;
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -12763,6 +12827,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Op->getFlags().hasNoFPExcept())
return Op;
return SDValue();
+ case ISD::VP_LOAD:
+ return LowerVP_LOAD(Op, DAG);
+ case ISD::VP_STORE:
+ return LowerVP_STORE(Op, DAG);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 880aca751d7d6..d967018982734 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1345,6 +1345,9 @@ namespace llvm {
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2fba090f2d501..6373343f2b2e3 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
+ cl::desc("Allow vp.load and vp.store for pwr9"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
@@ -1031,3 +1035,39 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
return TLI->supportsTailCallFor(CB);
}
+
+TargetTransformInfo::VPLegalization
+PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+ using VPLegalization = TargetTransformInfo::VPLegalization;
+ unsigned Directive = ST->getCPUDirective();
+ VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
+ if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
+ (!Pwr9EVL || Directive != PPC::DIR_PWR9))
+ return DefaultLegalization;
+
+ if (!ST->isPPC64())
+ return DefaultLegalization;
+
+ unsigned IID = PI.getIntrinsicID();
+ if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
+ return DefaultLegalization;
+
+ bool IsLoad = IID == Intrinsic::vp_load;
+ Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
+ EVT VT = TLI->getValueType(DL, VecTy, true);
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+ VT != MVT::v16i8)
+ return DefaultLegalization;
+
+ auto IsAllTrueMask = [](Value *MaskVal) {
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+ return false;
+ };
+ unsigned MaskIx = IsLoad ? 1 : 2;
+ if (!IsAllTrueMask(PI.getOperand(MaskIx)))
+ return DefaultLegalization;
+
+ return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 475472ac3720f..385ad89876b93 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -150,6 +150,9 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
const ArrayRef<Type *> &Types) const override;
bool supportsTailCallFor(const CallBase *CB) const override;
+ TargetTransformInfo::VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/CodeGen/PowerPC/vp-ld-st.ll b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
new file mode 100644
index 0000000000000..f0f9943e901ec
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
+
+; Function Attrs: nounwind readnone
+define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 56
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: stxvrl 34, 5, 6
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 57
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 1
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 58
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 2
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 59
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 3
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @lxvl1(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 56
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
+ ret <16 x i8> %0
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @lxvl2(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 57
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 1
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
+ ret <8 x i16> %0
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @lxvl4(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 58
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 2
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
+ ret <4 x i32> %0
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @lxvl8(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 59
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 3
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
+ ret <2 x i64> %0
+}
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.