diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 6b209ed2d715..a1e1f9b07aaf 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -144,50 +144,6 @@ class TargetTransformInfoImplBase { return TTI::TCC_Expensive; } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { - switch (IID) { - default: - // Intrinsics rarely (if ever) have normal argument setup constraints. - // Model them as having a basic instruction cost. - return TTI::TCC_Basic; - - // TODO: other libc intrinsics. - case Intrinsic::memcpy: - return getMemcpyCost(dyn_cast(U)); - - case Intrinsic::annotation: - case Intrinsic::assume: - case Intrinsic::sideeffect: - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: - case Intrinsic::is_constant: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - case Intrinsic::experimental_gc_result: - case Intrinsic::experimental_gc_relocate: - case Intrinsic::coro_alloc: - case Intrinsic::coro_begin: - case Intrinsic::coro_free: - case Intrinsic::coro_end: - case Intrinsic::coro_frame: - case Intrinsic::coro_size: - case Intrinsic::coro_suspend: - case Intrinsic::coro_param: - case Intrinsic::coro_subfn_addr: - // These intrinsics don't actually represent code after lowering. - return TTI::TCC_Free; - } - } - bool hasBranchDivergence() { return false; } bool isSourceOfDivergence(const Value *V) { return false; } @@ -786,7 +742,49 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TTI::TCC_Basic; } - using BaseT::getIntrinsicCost; + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys, const User *U) { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + return TTI::TCC_Basic; + + // TODO: other libc intrinsics. + case Intrinsic::memcpy: + return static_cast(this)->getMemcpyCost(dyn_cast(U)); + + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_param: + case Intrinsic::coro_subfn_addr: + // These intrinsics don't actually represent code after lowering. + return TTI::TCC_Free; + } + } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, const User *U) { diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index ea68faa7fc7f..5630a01824dc 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -665,7 +665,7 @@ template class ClobberWalker { struct generic_def_path_iterator : public iterator_facade_base, std::forward_iterator_tag, T *> { - generic_def_path_iterator() = default; + generic_def_path_iterator() {} generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} T &operator*() const { return curNode(); } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 66d3a281d42d..88f0630f014f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -486,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugInstr()) + if (!MI.isDebugInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; + } } - Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + // It's possible we found a scheduling region that only has debug + // instructions. Don't bother scheduling these. + if (NumRegionInstrs != 0) + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); } if (RegionsTopDown) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 2b89f9d16fae..1ebc820a8b49 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -141,15 +141,25 @@ void IntelJITEventListener::notifyObjectLoaded( uint64_t Addr = *AddrOrErr; uint64_t Size = P.second; + auto SecOrErr = Sym.getSection(); + if (!SecOrErr) { + // TODO: Actually report errors helpfully. + consumeError(SecOrErr.takeError()); + continue; + } + object::section_iterator Sec = *SecOrErr; + if (Sec == Obj.section_end()) + continue; + uint64_t Index = Sec->getIndex(); + // Record this address in a local vector Functions.push_back((void*)Addr); // Build the function loaded notification message iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size); - // TODO: it is neccessary to set proper SectionIndex here. - // object::SectionedAddress::UndefSection works for only absolute addresses. - DILineInfoTable Lines = Context->getLineInfoForAddressRange({Addr, object::SectionedAddress::UndefSection}, Size); + DILineInfoTable Lines = + Context->getLineInfoForAddressRange({Addr, Index}, Size); DILineInfoTable::iterator Begin = Lines.begin(); DILineInfoTable::iterator End = Lines.end(); for (DILineInfoTable::iterator It = Begin; It != End; ++It) { diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp index e578e0591f90..bd017c82fb4d 100644 --- a/lib/ObjectYAML/MinidumpYAML.cpp +++ b/lib/ObjectYAML/MinidumpYAML.cpp @@ -17,7 +17,7 @@ class BlobAllocator { public: size_t tell() const { return NextOffset; } - size_t AllocateCallback(size_t Size, + size_t allocateCallback(size_t Size, std::function Callback) { size_t Offset = NextOffset; NextOffset += Size; @@ -25,18 +25,18 @@ class BlobAllocator { return Offset; } - size_t AllocateBytes(ArrayRef Data) { - return AllocateCallback( + size_t allocateBytes(ArrayRef Data) { + return allocateCallback( Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); } - template size_t AllocateArray(ArrayRef Data) { - return AllocateBytes({reinterpret_cast(Data.data()), + template size_t allocateArray(ArrayRef Data) { + return allocateBytes({reinterpret_cast(Data.data()), sizeof(T) * Data.size()}); } - template size_t AllocateObject(const T &Data) { - return AllocateArray(makeArrayRef(Data)); + template size_t allocateObject(const T &Data) { + return allocateArray(makeArrayRef(Data)); } void writeTo(raw_ostream &OS) const; @@ -340,7 +340,7 @@ static Directory layout(BlobAllocator &File, Stream &S) { switch (S.Kind) { case Stream::StreamKind::RawContent: { RawContentStream &Raw = cast(S); - File.AllocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { + File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { Raw.Content.writeAsBinary(OS); assert(Raw.Content.binary_size() <= Raw.Size); OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); @@ -348,10 +348,10 @@ static Directory layout(BlobAllocator &File, Stream &S) { break; } case Stream::StreamKind::SystemInfo: - File.AllocateObject(cast(S).Info); + File.allocateObject(cast(S).Info); break; case Stream::StreamKind::TextContent: - File.AllocateArray(arrayRefFromStringRef(cast(S).Text)); + File.allocateArray(arrayRefFromStringRef(cast(S).Text)); break; } Result.Location.DataSize = File.tell() - Result.Location.RVA; @@ -360,11 +360,11 @@ static Directory layout(BlobAllocator &File, Stream &S) { void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) { BlobAllocator File; - File.AllocateObject(Obj.Header); + File.allocateObject(Obj.Header); std::vector StreamDirectory(Obj.Streams.size()); Obj.Header.StreamDirectoryRVA = - File.AllocateArray(makeArrayRef(StreamDirectory)); + File.allocateArray(makeArrayRef(StreamDirectory)); Obj.Header.NumberOfStreams = StreamDirectory.size(); for (auto &Stream : enumerate(Obj.Streams)) diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index dae7d455d826..03c0353390f0 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -200,6 +200,8 @@ class SIWholeQuadMode : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 82817d3b5ad1..5ab211b791e7 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -129,14 +129,13 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { // We're keeping these builders around because we'll want to add support for // floating point to them. - auto &LoadStoreBuilder = - getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({ - {s1, p0, 8, 8}, - {s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {p0, p0, 32, 8}}); + auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .legalForTypesWithMemDesc({{s1, p0, 8, 8}, + {s8, p0, 8, 8}, + {s16, p0, 16, 8}, + {s32, p0, 32, 8}, + {p0, p0, 32, 8}}) + .unsupportedIfMemSizeNotPow2(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); @@ -155,7 +154,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) .legalFor({s32, s64}); - LoadStoreBuilder.legalFor({{s64, p0}}); + LoadStoreBuilder + .legalForTypesWithMemDesc({{s64, p0, 64, 32}}) + .maxScalar(0, s32); PhiBuilder.legalFor({s64}); getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({s1}, diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp index fd0f7921ba3a..0cee6e732ec0 100644 --- a/lib/Target/Mips/MipsCallLowering.cpp +++ b/lib/Target/Mips/MipsCallLowering.cpp @@ -23,10 +23,10 @@ using namespace llvm; MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI) : CallLowering(&TLI) {} -bool MipsCallLowering::MipsHandler::assign(unsigned VReg, - const CCValAssign &VA) { +bool MipsCallLowering::MipsHandler::assign(unsigned VReg, const CCValAssign &VA, + const EVT &VT) { if (VA.isRegLoc()) { - assignValueToReg(VReg, VA); + assignValueToReg(VReg, VA, VT); } else if (VA.isMemLoc()) { assignValueToAddress(VReg, VA); } else { @@ -37,9 +37,10 @@ bool MipsCallLowering::MipsHandler::assign(unsigned VReg, bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef VRegs, ArrayRef ArgLocs, - unsigned ArgLocsStartIndex) { + unsigned ArgLocsStartIndex, + const EVT &VT) { for (unsigned i = 0; i < VRegs.size(); ++i) - if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i])) + if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i], VT)) return false; return true; } @@ -71,10 +72,10 @@ bool MipsCallLowering::MipsHandler::handle( for (unsigned i = 0; i < SplitLength; ++i) VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT})); - if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg)) + if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg, VT)) return false; } else { - if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex])) + if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex], VT)) return false; } } @@ -88,7 +89,8 @@ class IncomingValueHandler : public MipsCallLowering::MipsHandler { : MipsHandler(MIRBuilder, MRI) {} private: - void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override; + void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) override; unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) override; @@ -97,7 +99,7 @@ class IncomingValueHandler : public MipsCallLowering::MipsHandler { bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) override; + unsigned ArgsReg, const EVT &VT) override; virtual void markPhysRegUsed(unsigned PhysReg) { MIRBuilder.getMBB().addLiveIn(PhysReg); @@ -127,21 +129,47 @@ class CallReturnHandler : public IncomingValueHandler { } // end anonymous namespace void IncomingValueHandler::assignValueToReg(unsigned ValVReg, - const CCValAssign &VA) { + const CCValAssign &VA, + const EVT &VT) { + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); unsigned PhysReg = VA.getLocReg(); - switch (VA.getLocInfo()) { - case CCValAssign::LocInfo::SExt: - case CCValAssign::LocInfo::ZExt: - case CCValAssign::LocInfo::AExt: { - auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); - MIRBuilder.buildTrunc(ValVReg, Copy); - break; - } - default: - MIRBuilder.buildCopy(ValVReg, PhysReg); - break; + if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::BuildPairF64_64 + : Mips::BuildPairF64) + .addDef(ValVReg) + .addUse(PhysReg + (STI.isLittle() ? 0 : 1)) + .addUse(PhysReg + (STI.isLittle() ? 1 : 0)) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + markPhysRegUsed(PhysReg); + markPhysRegUsed(PhysReg + 1); + } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder.buildInstr(Mips::MTC1) + .addDef(ValVReg) + .addUse(PhysReg) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + markPhysRegUsed(PhysReg); + } else { + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + markPhysRegUsed(PhysReg); } - markPhysRegUsed(PhysReg); } unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA, @@ -180,8 +208,8 @@ void IncomingValueHandler::assignValueToAddress(unsigned ValVReg, bool IncomingValueHandler::handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) { - if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex)) + unsigned ArgsReg, const EVT &VT) { + if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT)) return false; setLeastSignificantFirst(VRegs); MIRBuilder.buildMerge(ArgsReg, VRegs); @@ -196,7 +224,8 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { : MipsHandler(MIRBuilder, MRI), MIB(MIB) {} private: - void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override; + void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) override; unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) override; @@ -205,7 +234,7 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) override; + unsigned ArgsReg, const EVT &VT) override; unsigned extendRegister(unsigned ValReg, const CCValAssign &VA); @@ -214,11 +243,40 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { } // end anonymous namespace void OutgoingValueHandler::assignValueToReg(unsigned ValVReg, - const CCValAssign &VA) { + const CCValAssign &VA, + const EVT &VT) { unsigned PhysReg = VA.getLocReg(); - unsigned ExtReg = extendRegister(ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - MIB.addUse(PhysReg, RegState::Implicit); + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + + if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64 + : Mips::ExtractElementF64) + .addDef(PhysReg + (STI.isLittle() ? 1 : 0)) + .addUse(ValVReg) + .addImm(1) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64 + : Mips::ExtractElementF64) + .addDef(PhysReg + (STI.isLittle() ? 0 : 1)) + .addUse(ValVReg) + .addImm(0) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder.buildInstr(Mips::MFC1) + .addDef(PhysReg) + .addUse(ValVReg) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } else { + unsigned ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); + MIB.addUse(PhysReg, RegState::Implicit); + } } unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA, @@ -286,10 +344,10 @@ unsigned OutgoingValueHandler::extendRegister(unsigned ValReg, bool OutgoingValueHandler::handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) { + unsigned ArgsReg, const EVT &VT) { MIRBuilder.buildUnmerge(VRegs, ArgsReg); setLeastSignificantFirst(VRegs); - if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex)) + if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT)) return false; return true; @@ -300,6 +358,8 @@ static bool isSupportedType(Type *T) { return true; if (T->isPointerTy()) return true; + if (T->isFloatingPointTy()) + return true; return false; } diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h index dc04d83733e7..05c703b60bd6 100644 --- a/lib/Target/Mips/MipsCallLowering.h +++ b/lib/Target/Mips/MipsCallLowering.h @@ -35,7 +35,7 @@ class MipsCallLowering : public CallLowering { protected: bool assignVRegs(ArrayRef VRegs, ArrayRef ArgLocs, - unsigned Index); + unsigned ArgLocsStartIndex, const EVT &VT); void setLeastSignificantFirst(SmallVectorImpl &VRegs); @@ -43,19 +43,21 @@ class MipsCallLowering : public CallLowering { MachineRegisterInfo &MRI; private: - bool assign(unsigned VReg, const CCValAssign &VA); + bool assign(unsigned VReg, const CCValAssign &VA, const EVT &VT); virtual unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) = 0; - virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0; + virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) = 0; virtual void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) = 0; virtual bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, - unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0; + unsigned ArgLocsStartIndex, unsigned ArgsReg, + const EVT &VT) = 0; }; MipsCallLowering(const MipsTargetLowering &TLI); diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp index 36aea2983591..ded8c1c1fbc0 100644 --- a/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/lib/Target/Mips/MipsInstructionSelector.cpp @@ -38,6 +38,7 @@ class MipsInstructionSelector : public InstructionSelector { bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool materialize32BitImm(unsigned DestReg, APInt Imm, MachineIRBuilder &B) const; + bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -75,15 +76,24 @@ MipsInstructionSelector::MipsInstructionSelector( { } -static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { +bool MipsInstructionSelector::selectCopy(MachineInstr &I, + MachineRegisterInfo &MRI) const { unsigned DstReg = I.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) return true; - const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + if (RegBank->getID() == Mips::FPRBRegBankID) { + if (DstSize == 32) + RC = &Mips::FGR32RegClass; + else if (DstSize == 64) + RC = STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; + else + llvm_unreachable("Unsupported destination size"); + } if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"); @@ -162,7 +172,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) - return selectCopy(I, TII, MRI, TRI, RBI); + return selectCopy(I, MRI); return true; } diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp index 214dd106869e..08c33a4119ce 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -62,6 +62,11 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass( case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID: case Mips::SP32RegClassID: return getRegBank(Mips::GPRBRegBankID); + case Mips::FGRCCRegClassID: + case Mips::FGR64RegClassID: + case Mips::AFGR64RegClassID: + case Mips::AFGR64_and_OddSPRegClassID: + return getRegBank(Mips::FPRBRegBankID); default: llvm_unreachable("Register class not supported"); } diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td index b591841dfef2..14a0181f8f11 100644 --- a/lib/Target/Mips/MipsRegisterBanks.td +++ b/lib/Target/Mips/MipsRegisterBanks.td @@ -10,3 +10,5 @@ //===----------------------------------------------------------------------===// def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>; + +def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index bfa0df7db347..e08914c536e3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3167,14 +3167,14 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) }; - SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); // Update the chain. - ReplaceUses(Input.getValue(1), SDValue(NewNode, 1)); + ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); // Record the mem-refs CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); } else { - NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New); + NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New); } return NewNode; @@ -3561,13 +3561,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } // Emit the smaller op and the shift. - SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT); - SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + // Even though we shrink the constant, the VT should match the operation VT. + SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, NVT); + SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, MVT::i32, + N0->getOperand(0), NewCst); if (ShlVal == 1) - CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), + CurDAG->SelectNodeTo(Node, AddOp, NVT, MVT::i32, SDValue(New, 0), SDValue(New, 0)); else - CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), + CurDAG->SelectNodeTo(Node, ShlOp, NVT, MVT::i32, SDValue(New, 0), getI8Imm(ShlVal, dl)); return; } @@ -3968,7 +3970,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned TrailingZeros = countTrailingZeros(Mask); SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, + SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, N0.getOperand(0), Imm), 0); MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift); @@ -3979,7 +3981,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned LeadingZeros = countLeadingZeros(Mask); SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, + SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, N0.getOperand(0), Imm), 0); MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 4c06b1765432..7aab8f8f377f 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -19,11 +19,6 @@ def GetLo32XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; -def GetLo8XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); -}]>; - //===----------------------------------------------------------------------===// // Random Pseudo Instructions. @@ -1523,7 +1518,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (i64 0), (AND32ri8 (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo8XForm imm:$imm))), + (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; def : Pat<(and GR64:$src, i64immZExt32:$imm), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 99252fc2a7aa..2aa5fa45ce92 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -931,17 +931,6 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{ return N->isExactlyValue(+0.0); }]>; -def I8Imm : SDNodeXFormgetZExtValue(), SDLoc(N)); -}]>; - -// BYTE_imm - Transform bit immediates into byte immediates. -def BYTE_imm : SDNodeXForm> 3 - return getI32Imm(N->getZExtValue() >> 3, SDLoc(N)); -}]>; - // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index // to VEXTRACTF128/VEXTRACTI128 imm. def EXTRACT_get_vextract128_imm : SDNodeXForm(VL[0]); - Value *VLeft = I->getOperand(0); - Value *VRight = I->getOperand(1); - if (!isa(VRight) && isa(VLeft)) - // Favor having instruction to the right. FIXME: why? - std::swap(VLeft, VRight); - Left.push_back(VLeft); - Right.push_back(VRight); + Left.push_back(I->getOperand(0)); + Right.push_back(I->getOperand(1)); } // Keep track if we have instructions with all the same opcode on one side. diff --git a/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir new file mode 100644 index 000000000000..d60abaf7c27b --- /dev/null +++ b/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir @@ -0,0 +1,115 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s + +# The sequence of DBG_VALUEs forms a scheduling region with 0 real +# instructions. The RegPressure tracker would end up skipping over any +# debug instructions, so it would point to the instruction +# before/outside of the region, hitting this assert: +# assert((BotRPTracker.getPos() == RegionEnd || +# (RegionEnd->isDebugInstr() && +# BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && +# "Can't find the region bottom"); + +--- +name: only_dbg_value_sched_region +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true +body: | + ; CHECK-LABEL: name: only_dbg_value_sched_region + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, implicit $exec + ; CHECK: undef %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec + ; CHECK: %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec + ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, implicit $exec + ; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec + ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec + ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec + ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, implicit $exec + ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, implicit $exec + ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec + ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, implicit $exec + ; CHECK: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 2, [[DEF2]], implicit $exec + ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, implicit $exec + ; CHECK: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]] + ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: DBG_VALUE + ; CHECK: DBG_VALUE + ; CHECK: DBG_VALUE + ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: bb.2: + ; CHECK: S_NOP 0, implicit [[COPY]] + ; CHECK: S_NOP 0, implicit [[DEF8]] + ; CHECK: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64 = IMPLICIT_DEF + %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, implicit $exec + undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec + %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + %5:vreg_64 = COPY %2 + undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec + %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vreg_64 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + undef %11.sub1:vreg_64 = IMPLICIT_DEF + %12:vgpr_32 = IMPLICIT_DEF + %13:vgpr_32 = IMPLICIT_DEF + %14:vreg_64 = IMPLICIT_DEF + %15:vreg_64 = IMPLICIT_DEF + %16:vgpr_32 = IMPLICIT_DEF + %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec + %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec + GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec + %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, implicit $exec + %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, implicit $exec + %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec + %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, implicit $exec + %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, implicit $exec + %23:vreg_64 = V_LSHLREV_B64 2, %8, implicit $exec + S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17 + GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, implicit $exec + + bb.1: + S_SETREG_IMM32_B32 0, 1 + DBG_VALUE + DBG_VALUE + DBG_VALUE + S_SETREG_IMM32_B32 0, 1 + + bb.2: + S_NOP 0, implicit %0 + S_NOP 0, implicit %16 + S_ENDPGM 0 + +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index 34ed8b843075..a7d83d549b8a 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -1,10 +1,16 @@ # RUN: llc -mtriple arm-- -run-pass=legalizer %s -o - | FileCheck %s -# RUN: llc -mtriple thumb-- -mattr=+v6t2 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple thumbv7-- -run-pass=legalizer %s -o - | FileCheck %s --- | define void @test_legal_loads_stores() { ret void } define void @test_load_from_stack() { ret void } + define void @test_load_store_64_vfp() #0 { ret void } + define void @test_load_store_64_novfp() #1 { ret void } + define void @test_gep() { ret void } + + attributes #0 = { "target-features"="+vfp2" } + attributes #1 = { "target-features"="-vfp2" } ... --- name: test_legal_loads_stores @@ -81,6 +87,88 @@ body: | BX_RET 14, $noreg, implicit $r0 ... --- +name: test_load_store_64_vfp +# CHECK-LABEL: name: test_load_store_64_vfp +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: $r0 + + ; Can't use the VFP support for unaligned operations, we need to use 32-bits + ; operations instead. + ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) + %0(p0) = COPY $r0 + %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) + G_STORE %1(s64), %0(p0) :: (store 8, align 1) + + ; For word-aligned we can use VFP operations. + ; CHECK: [[V:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8, align 4) + ; CHECK: G_STORE [[V]](s64), %0(p0) :: (store 8, align 4) + %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) + G_STORE %2(s64), %0(p0) :: (store 8, align 4) + + BX_RET 14, $noreg +... +--- +name: test_load_store_64_novfp +# CHECK-LABEL: name: test_load_store_64_novfp +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: $r0 + + ; When we don't have VFP support, we need to use 32-bit operations. + ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) + %0(p0) = COPY $r0 + %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) + G_STORE %1(s64), %0(p0) :: (store 8, align 1) + + ; CHECK: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4) + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4) + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4) + %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) + G_STORE %2(s64), %0(p0) :: (store 8, align 4) + + BX_RET 14, $noreg +... +--- name: test_gep # CHECK-LABEL: name: test_gep legalized: false diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index ae4e94904ec9..f4408adce960 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -1,7 +1,5 @@ # RUN: llc -mtriple arm-- -run-pass=legalizer %s -o - | FileCheck %s --- | - define void @test_load_store_64() #0 { ret void } - define void @test_constants_s64() { ret void } define void @test_phi_s64() #0 { ret void } @@ -9,34 +7,6 @@ attributes #0 = { "target-features"="+vfp2" } ... --- -name: test_load_store_64 -# CHECK-LABEL: name: test_load_store_64 -legalized: false -# CHECK: legalized: true -regBankSelected: false -selected: false -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } - - { id: 6, class: _ } -body: | - bb.0: - liveins: $r0 - - ; These are legal, so we should find them unchanged in the output - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s64), %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s64) = G_LOAD %0(p0) - %0(p0) = COPY $r0 - %1(s64) = G_LOAD %0(p0) :: (load 8) - G_STORE %1(s64), %0(p0) :: (store 8) - BX_RET 14, $noreg -... ---- name: test_constants_s64 # CHECK-LABEL: name: test_constants_s64 legalized: false diff --git a/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir b/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir new file mode 100644 index 000000000000..a81888ab49b3 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir @@ -0,0 +1,303 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 +--- | + + define void @float_in_fpr() {entry: ret void} + define void @double_in_fpr() {entry: ret void} + define void @float_in_gpr() {entry: ret void} + define void @double_in_gpr() {entry: ret void} + define void @call_float_in_fpr() {entry: ret void} + define void @call_double_in_fpr() {entry: ret void} + define void @call_float_in_gpr() {entry: ret void} + define void @call_double_in_gpr() {entry: ret void} + +... +--- +name: float_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f14 + ; FP32: $f0 = COPY [[COPY]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f14 + ; FP64: $f0 = COPY [[COPY]] + ; FP64: RetRA implicit $f0 + %1:fprb(s32) = COPY $f14 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d7 + ; FP32: $d0 = COPY [[COPY]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d7 + ; FP64: $d0 = COPY [[COPY]] + ; FP64: RetRA implicit $d0 + %1:fprb(s64) = COPY $d7 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: float_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]] + ; FP64: RetRA implicit $f0 + %1:fgr32(s32) = MTC1 $a1 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP64: $d0 = COPY [[BuildPairF64_]] + ; FP64: RetRA implicit $d0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]] + ; FP32: $f14 = COPY [[COPY1]] + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:fgr32 = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]] + ; FP64: $f14 = COPY [[COPY1]] + ; FP64: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:fgr32 = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]] + ; FP64: RetRA implicit $f0 + %0:fprb(s32) = COPY $f12 + %1:fprb(s32) = COPY $f14 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $f12 = COPY %0(s32) + $f14 = COPY %1(s32) + JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + %2:fprb(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]] + ; FP32: $d7 = COPY [[COPY1]] + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:afgr64 = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6 + ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d6 = COPY [[COPY]] + ; FP64: $d7 = COPY [[COPY1]] + ; FP64: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP64: [[COPY2:%[0-9]+]]:fgr64 = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY2]] + ; FP64: RetRA implicit $d0 + %0:fprb(s64) = COPY $d6 + %1:fprb(s64) = COPY $d7 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $d6 = COPY %0(s64) + $d7 = COPY %1(s64) + JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + %2:fprb(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]] + ; FP32: $a1 = MFC1 [[MTC1_]] + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]] + ; FP64: $a1 = MFC1 [[MTC1_]] + ; FP64: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]] + ; FP64: RetRA implicit $f0 + %0:gprb(s32) = COPY $a0 + %1:fgr32(s32) = MTC1 $a1 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a1 = MFC1 %1(s32) + JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + %2:fprb(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]] + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]], 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]], 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]] + ; FP64: $a3 = ExtractElementF64 [[BuildPairF64_]], 1 + ; FP64: $a2 = ExtractElementF64 [[BuildPairF64_]], 0 + ; FP64: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY1]] + ; FP64: RetRA implicit $d0 + %0:gprb(s32) = COPY $a0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a3 = ExtractElementF64 %1(s64), 1 + $a2 = ExtractElementF64 %1(s64), 0 + JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + %2:fprb(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... + diff --git a/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll b/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll new file mode 100644 index 000000000000..24cfcd895a78 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 + +define float @float_in_fpr(float %a, float %b) { + ; FP32-LABEL: name: float_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 +entry: + ret float %b +} + +define double @double_in_fpr(double %a, double %b) { + ; FP32-LABEL: name: double_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $d12_64, $d14_64 + ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64 + ; FP64: $d0_64 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + ret double %b +} + +define float @float_in_gpr(i32 %a, float %b) { + ; FP32-LABEL: name: float_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]](s32) + ; FP64: RetRA implicit $f0 +entry: + ret float %b +} + +define double @double_in_gpr(i32 %a, double %b) { + ; FP32-LABEL: name: double_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[BuildPairF64_64_:%[0-9]+]]:fgr64(s64) = BuildPairF64_64 $a2, $a3 + ; FP64: $d0_64 = COPY [[BuildPairF64_64_]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + ret double %b +} + +define float @call_float_in_fpr(float %a, float %b) { + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]](s32) + ; FP32: $f14 = COPY [[COPY1]](s32) + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]](s32) + ; FP64: $f14 = COPY [[COPY1]](s32) + ; FP64: JAL @float_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]](s32) + ; FP64: RetRA implicit $f0 +entry: + %call = call float @float_in_fpr(float %a, float %b) + ret float %call +} + +define double @call_double_in_fpr(double %a, double %b) { + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]](s64) + ; FP32: $d7 = COPY [[COPY1]](s64) + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $d12_64, $d14_64 + ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d12_64 = COPY [[COPY]](s64) + ; FP64: $d14_64 = COPY [[COPY1]](s64) + ; FP64: JAL @double_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit $d14_64, implicit-def $d0_64 + ; FP64: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0_64 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0_64 = COPY [[COPY2]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + %call = call double @double_in_fpr(double %a, double %b) + ret double %call +} + +define float @call_float_in_gpr(i32 %a, float %b) { + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a1 = MFC1 [[MTC1_]](s32) + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a1 = MFC1 [[MTC1_]](s32) + ; FP64: JAL @float_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 +entry: + %call = call float @float_in_gpr(i32 %a, float %b) + ret float %call +} + + +define double @call_double_in_gpr(i32 %a, double %b) { + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[BuildPairF64_64_:%[0-9]+]]:fgr64(s64) = BuildPairF64_64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a3 = ExtractElementF64_64 [[BuildPairF64_64_]](s64), 1 + ; FP64: $a2 = ExtractElementF64_64 [[BuildPairF64_64_]](s64), 0 + ; FP64: JAL @double_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d0_64 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0_64 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + %call = call double @double_in_gpr(i32 %a, double %b) + ret double %call +} diff --git a/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll b/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll new file mode 100644 index 000000000000..e46b7e64acd2 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP32 +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP64 + +define float @float_in_fpr(float %a, float %b) { +; MIPS32-LABEL: float_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.s $f0, $f14 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret float %b +} + +define double @double_in_fpr(double %a, double %b) { +; MIPS32-LABEL: double_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.d $f0, $f14 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret double %b +} + +define float @float_in_gpr(i32 %a, float %b) { +; MIPS32-LABEL: float_in_gpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mtc1 $5, $f0 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret float %b +} + +define double @double_in_gpr(i32 %a, double %b) { +; FP32-LABEL: double_in_gpr: +; FP32: # %bb.0: # %entry +; FP32-NEXT: mtc1 $6, $f0 +; FP32-NEXT: mtc1 $7, $f1 +; FP32-NEXT: jr $ra +; FP32-NEXT: nop +; +; FP64-LABEL: double_in_gpr: +; FP64: # %bb.0: # %entry +; FP64-NEXT: mtc1 $6, $f0 +; FP64-NEXT: mthc1 $7, $f0 +; FP64-NEXT: jr $ra +; FP64-NEXT: nop +entry: + ret double %b +} + +define float @call_float_in_fpr(float %a, float %b) { +; MIPS32-LABEL: call_float_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: jal float_in_fpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call float @float_in_fpr(float %a, float %b) + ret float %call +} + +define double @call_double_in_fpr(double %a, double %b) { +; MIPS32-LABEL: call_double_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: jal double_in_fpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call double @double_in_fpr(double %a, double %b) + ret double %call +} + +define float @call_float_in_gpr(i32 %a, float %b) { +; MIPS32-LABEL: call_float_in_gpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: mtc1 $5, $f0 +; MIPS32-NEXT: mfc1 $5, $f0 +; MIPS32-NEXT: jal float_in_gpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call float @float_in_gpr(i32 %a, float %b) + ret float %call +} + + +define double @call_double_in_gpr(i32 %a, double %b) { +; FP32-LABEL: call_double_in_gpr: +; FP32: # %bb.0: # %entry +; FP32-NEXT: addiu $sp, $sp, -24 +; FP32-NEXT: .cfi_def_cfa_offset 24 +; FP32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; FP32-NEXT: .cfi_offset 31, -4 +; FP32-NEXT: mtc1 $6, $f0 +; FP32-NEXT: mtc1 $7, $f1 +; FP32-NEXT: mfc1 $7, $f1 +; FP32-NEXT: mfc1 $6, $f0 +; FP32-NEXT: jal double_in_gpr +; FP32-NEXT: nop +; FP32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; FP32-NEXT: addiu $sp, $sp, 24 +; FP32-NEXT: jr $ra +; FP32-NEXT: nop +; +; FP64-LABEL: call_double_in_gpr: +; FP64: # %bb.0: # %entry +; FP64-NEXT: addiu $sp, $sp, -24 +; FP64-NEXT: .cfi_def_cfa_offset 24 +; FP64-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; FP64-NEXT: .cfi_offset 31, -4 +; FP64-NEXT: mtc1 $6, $f0 +; FP64-NEXT: mthc1 $7, $f0 +; FP64-NEXT: mfhc1 $7, $f0 +; FP64-NEXT: mfc1 $6, $f0 +; FP64-NEXT: jal double_in_gpr +; FP64-NEXT: nop +; FP64-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; FP64-NEXT: addiu $sp, $sp, 24 +; FP64-NEXT: jr $ra +; FP64-NEXT: nop +entry: + %call = call double @double_in_gpr(i32 %a, double %b) + ret double %call +} diff --git a/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir b/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir new file mode 100644 index 000000000000..ba4d28ca53ac --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir @@ -0,0 +1,296 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 + +--- | + + define void @float_in_fpr() {entry: ret void} + define void @double_in_fpr() {entry: ret void} + define void @float_in_gpr() {entry: ret void} + define void @double_in_gpr() {entry: ret void} + define void @call_float_in_fpr() {entry: ret void} + define void @call_double_in_fpr() {entry: ret void} + define void @call_float_in_gpr() {entry: ret void} + define void @call_double_in_gpr() {entry: ret void} + +... +--- +name: float_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP32: $f0 = COPY [[COPY]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP64: $f0 = COPY [[COPY]](s32) + ; FP64: RetRA implicit $f0 + %1:_(s32) = COPY $f14 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP32: $d0 = COPY [[COPY]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP64: $d0 = COPY [[COPY]](s64) + ; FP64: RetRA implicit $d0 + %1:_(s64) = COPY $d7 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: float_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]](s32) + ; FP64: RetRA implicit $f0 + %1:fgr32(s32) = MTC1 $a1 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP64: $d0 = COPY [[BuildPairF64_]](s64) + ; FP64: RetRA implicit $d0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]](s32) + ; FP32: $f14 = COPY [[COPY1]](s32) + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]](s32) + ; FP64: $f14 = COPY [[COPY1]](s32) + ; FP64: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]](s32) + ; FP64: RetRA implicit $f0 + %0:_(s32) = COPY $f12 + %1:_(s32) = COPY $f14 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $f12 = COPY %0(s32) + $f14 = COPY %1(s32) + JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + %2:_(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]](s64) + ; FP32: $d7 = COPY [[COPY1]](s64) + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d6 = COPY [[COPY]](s64) + ; FP64: $d7 = COPY [[COPY1]](s64) + ; FP64: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP64: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY2]](s64) + ; FP64: RetRA implicit $d0 + %0:_(s64) = COPY $d6 + %1:_(s64) = COPY $d7 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $d6 = COPY %0(s64) + $d7 = COPY %1(s64) + JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + %2:_(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a1 = MFC1 [[MTC1_]](s32) + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a1 = MFC1 [[MTC1_]](s32) + ; FP64: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 + %0:_(s32) = COPY $a0 + %1:fgr32(s32) = MTC1 $a1 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a1 = MFC1 %1(s32) + JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + %2:_(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP64: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP64: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0 + %0:_(s32) = COPY $a0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a3 = ExtractElementF64 %1(s64), 1 + $a2 = ExtractElementF64 %1(s64), 0 + JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + %2:_(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... + diff --git a/test/CodeGen/X86/regalloc-copy-hints.mir b/test/CodeGen/X86/regalloc-copy-hints.mir index 6287066e64fe..3a5cd41b852f 100644 --- a/test/CodeGen/X86/regalloc-copy-hints.mir +++ b/test/CodeGen/X86/regalloc-copy-hints.mir @@ -3,310 +3,12 @@ # REQUIRES: asserts --- | - %0 = type { %1 } - %1 = type { %2, %23, %23*, %27*, %28*, %29, %33*, %34, %42, i8, i32, i32, i32 } - %2 = type { %3, %6, %14, %14, i8, i8*, i8*, %16 } - %3 = type { i32 (...)**, %4*, %5* } - %4 = type { i32 (...)**, %3* } - %5 = type { i32 (...)** } - %6 = type { %7 } - %7 = type { %8, i32, %12 } - %8 = type { %9**, %9**, %9**, %10 } - %9 = type { i32, i32, i32, i8* } - %10 = type { %11 } - %11 = type { %9** } - %12 = type { %13 } - %13 = type { i32 } - %14 = type { i32, %15* } - %15 = type { i32, i32, i8* } - %16 = type { %17 } - %17 = type { %18*, %20, %22 } - %18 = type { %19* } - %19 = type <{ %18, %19*, %18*, i8, [3 x i8] }> - %20 = type { %21 } - %21 = type { %18 } - %22 = type { %13 } - %23 = type { %24 } - %24 = type { %18*, %25, %26 } - %25 = type { %21 } - %26 = type { %13 } - %27 = type { i32 (...)** } - %28 = type { i32 (...)** } - %29 = type { %30 } - %30 = type { %18*, %31, %32 } - %31 = type { %21 } - %32 = type { %13 } - %33 = type { i32 (...)** } - %34 = type { %35 } - %35 = type { %36 } - %36 = type { %37, i32, %41 } - %37 = type { %38**, %38**, %38**, %39 } - %38 = type { %42, i32 } - %39 = type { %40 } - %40 = type { %38** } - %41 = type { %13 } - %42 = type { %43 } - %43 = type { %18*, %44, %45 } - %44 = type { %21 } - %45 = type { %13 } - %46 = type { %47, %48 } - %47 = type <{ %18, %19*, %18*, i8 }> - %48 = type { %49 } - %49 = type { i32, %50 } - %50 = type { { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 } } - - define void @fun(%0* %arg) local_unnamed_addr #0 align 2 personality i32 (...)* @__gxx_personality_v0 { - bb: - %tmp = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1 - %tmp1 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0 - br i1 undef, label %bb5, label %bb6 - - bb5: ; preds = %bb - unreachable - - bb6: ; preds = %bb - %tmp8 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 8, i32 0, i32 1, i32 0, i32 0 - br i1 undef, label %bb10, label %bb9 - - bb9: ; preds = %bb6 - unreachable - - bb10: ; preds = %bb6 - store %18* %tmp8, %18** undef - br i1 undef, label %bb14, label %bb13 - - bb13: ; preds = %bb10 - unreachable - - bb14: ; preds = %bb10 - br i1 undef, label %bb17, label %bb18 - - bb17: ; preds = %bb14 - unreachable - - bb18: ; preds = %bb14 - br i1 undef, label %bb20, label %bb19 - - bb19: ; preds = %bb18 - unreachable - - bb20: ; preds = %bb18 - br i1 undef, label %bb25, label %bb24 - - bb24: ; preds = %bb20 - unreachable - - bb25: ; preds = %bb20 - br i1 undef, label %bb29, label %bb30 - - bb29: ; preds = %bb25 - unreachable - - bb30: ; preds = %bb25 - br i1 undef, label %bb38, label %bb31 - - bb31: ; preds = %bb30 - %tmp32 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 - br i1 undef, label %bb34, label %bb35 - - bb34: ; preds = %bb31 - unreachable - - bb35: ; preds = %bb31 - br i1 undef, label %bb40, label %bb36 - - bb36: ; preds = %bb35 - unreachable - - bb38: ; preds = %bb30 - %tmp391 = bitcast %18* %tmp1 to %19** - br label %bb40 - - bb40: ; preds = %bb35, %bb38 - %tmp41 = phi %18* [ %tmp1, %bb38 ], [ null, %bb35 ] - %tmp42 = phi %19** [ %tmp391, %bb38 ], [ %tmp32, %bb35 ] - br i1 undef, label %bb43, label %bb48 - - bb43: ; preds = %bb40 - %tmp44 = tail call i8* @_Znwj() - store %18* %tmp41, %18** undef - %tmp46 = bitcast %19** %tmp42 to i8** - store i8* %tmp44, i8** %tmp46 - %0 = bitcast i8* %tmp44 to %46* - tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() - br label %bb48 - - bb48: ; preds = %bb43, %bb40 - %tmp49 = phi %46* [ %0, %bb43 ], [ undef, %bb40 ] - %tmp50 = getelementptr inbounds %46, %46* %tmp49, i32 0, i32 1, i32 0, i32 1, i32 4, i32 0 - store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv to i32), i32* %tmp50 - br i1 undef, label %bb52, label %bb53 - - bb52: ; preds = %bb48 - unreachable - - bb53: ; preds = %bb48 - br i1 undef, label %bb55, label %bb54 - - bb54: ; preds = %bb53 - unreachable - - bb55: ; preds = %bb53 - br i1 undef, label %bb59, label %bb58 - - bb58: ; preds = %bb55 - unreachable - - bb59: ; preds = %bb55 - br i1 undef, label %bb62, label %bb61 - - bb61: ; preds = %bb59 - unreachable - - bb62: ; preds = %bb59 - br i1 undef, label %bb64, label %bb65 - - bb64: ; preds = %bb62 - unreachable - - bb65: ; preds = %bb62 - %tmp66 = icmp eq %46* null, null - br i1 %tmp66, label %bb72, label %bb67 - - bb67: ; preds = %bb65 - %tmp68 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 - br i1 undef, label %bb70, label %bb74 - - bb70: ; preds = %bb67 - unreachable - - bb72: ; preds = %bb65 - %tmp732 = bitcast %18* %tmp1 to %19** - br label %bb74 - - bb74: ; preds = %bb67, %bb72 - %tmp75 = phi %18* [ %tmp1, %bb72 ], [ null, %bb67 ] - %tmp76 = phi %19** [ %tmp732, %bb72 ], [ %tmp68, %bb67 ] - %tmp77 = tail call i8* @_Znwj() - store %18* %tmp75, %18** undef - %tmp79 = bitcast %19** %tmp76 to i8** - store i8* %tmp77, i8** %tmp79 - %1 = bitcast i8* %tmp77 to %46* - tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() - %tmp81 = getelementptr inbounds %46, %46* %1, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0 - store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv to i32), i32* %tmp81 - store %18* %tmp8, %18** undef - %2 = bitcast %0* %arg to i8* - %sunkaddr = getelementptr i8, i8* %2, i32 140 - %3 = bitcast i8* %sunkaddr to %18** - %tmp85 = load %18*, %18** %3 - %tmp864 = bitcast %18* %tmp85 to %19** - %tmp87 = load %19*, %19** %tmp864 - %tmp88 = icmp eq %19* %tmp87, null - br i1 %tmp88, label %bb90, label %bb89 - - bb89: ; preds = %bb74 - unreachable - - bb90: ; preds = %bb74 - br i1 undef, label %bb94, label %bb92 - - bb92: ; preds = %bb90 - br i1 undef, label %bb96, label %bb97 - - bb94: ; preds = %bb90 - unreachable - - bb96: ; preds = %bb92 - unreachable - - bb97: ; preds = %bb92 - br i1 undef, label %bb101, label %bb102 - - bb101: ; preds = %bb97 - unreachable - - bb102: ; preds = %bb97 - br i1 undef, label %bb104, label %bb103 - - bb103: ; preds = %bb102 - unreachable - - bb104: ; preds = %bb102 - br i1 undef, label %bb109, label %bb108 - - bb108: ; preds = %bb104 - unreachable - - bb109: ; preds = %bb104 - br i1 undef, label %bb111, label %bb112 - - bb111: ; preds = %bb109 - unreachable - - bb112: ; preds = %bb109 - br i1 undef, label %bb118, label %bb117 - - bb117: ; preds = %bb112 - unreachable - - bb118: ; preds = %bb112 - br i1 undef, label %bb120, label %bb121 - - bb120: ; preds = %bb118 - unreachable - - bb121: ; preds = %bb118 - br i1 undef, label %bb124, label %bb125 - - bb124: ; preds = %bb121 - unreachable - - bb125: ; preds = %bb121 - %4 = bitcast %18* %tmp1 to %46** - %tmp126 = load %46*, %46** %4 - %tmp127 = icmp eq %46* %tmp126, null - br i1 %tmp127, label %bb135, label %bb128 - - bb128: ; preds = %bb125 - br label %bb129 - - bb129: ; preds = %bb131, %bb128 - %tmp130 = icmp ugt i32 undef, 95406324 - br i1 %tmp130, label %bb131, label %bb133 - - bb131: ; preds = %bb129 - br label %bb129 - - bb133: ; preds = %bb129 - unreachable - - bb135: ; preds = %bb125 - br i1 undef, label %bb137, label %bb138 - - bb137: ; preds = %bb135 - unreachable - - bb138: ; preds = %bb135 - unreachable - } - - declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv(%0*) #0 - - declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv(%0*) #0 align 2 - - declare i32 @__gxx_personality_v0(...) #0 - - declare noalias nonnull i8* @_Znwj() local_unnamed_addr #0 - - declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() local_unnamed_addr #0 - - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 - - attributes #0 = { "target-cpu"="i486" } - attributes #1 = { nounwind } + define void @fun() { ret void } + declare noalias nonnull i8* @_Znwj() + declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv() + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv() ... --- # A physreg should always only be hinted once per getRegAllocationHints() query. @@ -405,7 +107,7 @@ frameInfo: fixedStack: - { id: 0, size: 4, alignment: 4, stack-id: 0, isImmutable: true } body: | - bb.0.bb: + bb.0: successors: %bb.1(0x00000001), %bb.2(0x7fffffff) %13:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -413,11 +115,11 @@ body: | JNE_1 %bb.2, implicit killed $eflags JMP_1 %bb.1 - bb.1.bb5: + bb.1: successors: - bb.2.bb6: + bb.2: successors: %bb.4(0x7fffffff), %bb.3(0x00000001) %15:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -425,26 +127,26 @@ body: | JNE_1 %bb.4, implicit killed $eflags JMP_1 %bb.3 - bb.3.bb9: + bb.3: successors: - bb.4.bb10: + bb.4: successors: %bb.6(0x7fffffff), %bb.5(0x00000001) - %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg %1:gr32 = LEA32r %12, 1, $noreg, 144, $noreg - MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 %18:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %18.sub_8bit, %18.sub_8bit, implicit-def $eflags JNE_1 %bb.6, implicit killed $eflags JMP_1 %bb.5 - bb.5.bb13: + bb.5: successors: - bb.6.bb14: + bb.6: successors: %bb.7(0x00000001), %bb.8(0x7fffffff) %20:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -452,11 +154,11 @@ body: | JNE_1 %bb.8, implicit killed $eflags JMP_1 %bb.7 - bb.7.bb17: + bb.7: successors: - bb.8.bb18: + bb.8: successors: %bb.10(0x7fffffff), %bb.9(0x00000001) %22:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -464,11 +166,11 @@ body: | JNE_1 %bb.10, implicit killed $eflags JMP_1 %bb.9 - bb.9.bb19: + bb.9: successors: - bb.10.bb20: + bb.10: successors: %bb.12(0x7fffffff), %bb.11(0x00000001) %24:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -476,11 +178,11 @@ body: | JNE_1 %bb.12, implicit killed $eflags JMP_1 %bb.11 - bb.11.bb24: + bb.11: successors: - bb.12.bb25: + bb.12: successors: %bb.13(0x00000001), %bb.14(0x7fffffff) %26:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -488,18 +190,18 @@ body: | JNE_1 %bb.14, implicit killed $eflags JMP_1 %bb.13 - bb.13.bb29: + bb.13: successors: - bb.14.bb30: + bb.14: %0:gr32 = LEA32r %12, 1, $noreg, 80, $noreg %28:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %28.sub_8bit, %28.sub_8bit, implicit-def $eflags JNE_1 %bb.20, implicit killed $eflags JMP_1 %bb.15 - bb.15.bb31: + bb.15: successors: %bb.16(0x00000001), %bb.17(0x7fffffff) %78:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -507,11 +209,11 @@ body: | JNE_1 %bb.17, implicit killed $eflags JMP_1 %bb.16 - bb.16.bb34: + bb.16: successors: - bb.17.bb35: + bb.17: successors: %bb.18(0x7fffffff), %bb.19(0x00000001) TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags @@ -521,15 +223,15 @@ body: | %79:gr32 = LEA32r %12, 1, $noreg, 80, $noreg JMP_1 %bb.21 - bb.19.bb36: + bb.19: successors: - bb.20.bb38: + bb.20: %78:gr32_abcd = COPY %0 %79:gr32 = COPY %0 - bb.21.bb40: + bb.21: successors: %bb.22, %bb.23 %35:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -538,31 +240,31 @@ body: | JNE_1 %bb.23, implicit killed $eflags JMP_1 %bb.22 - bb.22.bb43: + bb.22: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %80:gr32 = COPY killed $eax - MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 :: (store 4 into `%18** undef`) - MOV32mr %79, 1, $noreg, 0, $noreg, %80 :: (store 4 into %ir.tmp46) + MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 + MOV32mr %79, 1, $noreg, 0, $noreg, %80 ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp - bb.23.bb48: + bb.23: successors: %bb.24(0x00000001), %bb.25(0x7fffffff) - MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv :: (store 4 into %ir.tmp50) + MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv %39:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %39.sub_8bit, %39.sub_8bit, implicit-def $eflags JNE_1 %bb.25, implicit killed $eflags JMP_1 %bb.24 - bb.24.bb52: + bb.24: successors: - bb.25.bb53: + bb.25: successors: %bb.27(0x7fffffff), %bb.26(0x00000001) %41:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -570,11 +272,11 @@ body: | JNE_1 %bb.27, implicit killed $eflags JMP_1 %bb.26 - bb.26.bb54: + bb.26: successors: - bb.27.bb55: + bb.27: successors: %bb.29(0x7fffffff), %bb.28(0x00000001) %43:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -582,11 +284,11 @@ body: | JNE_1 %bb.29, implicit killed $eflags JMP_1 %bb.28 - bb.28.bb58: + bb.28: successors: - bb.29.bb59: + bb.29: successors: %bb.31(0x7fffffff), %bb.30(0x00000001) %45:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -594,11 +296,11 @@ body: | JNE_1 %bb.31, implicit killed $eflags JMP_1 %bb.30 - bb.30.bb61: + bb.30: successors: - bb.31.bb62: + bb.31: successors: %bb.32(0x00000001), %bb.33(0x7fffffff) %47:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -606,11 +308,11 @@ body: | JNE_1 %bb.33, implicit killed $eflags JMP_1 %bb.32 - bb.32.bb64: + bb.32: successors: - bb.33.bb65: + bb.33: successors: %bb.37(0x30000000), %bb.34(0x50000000) %49:gr8 = MOV8ri 1 @@ -618,7 +320,7 @@ body: | JNE_1 %bb.37, implicit killed $eflags JMP_1 %bb.34 - bb.34.bb67: + bb.34: successors: %bb.36(0x00000001), %bb.35(0x7fffffff) %81:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -629,38 +331,38 @@ body: | %82:gr32 = LEA32r %12, 1, $noreg, 80, $noreg JMP_1 %bb.38 - bb.36.bb70: + bb.36: successors: - bb.37.bb72: + bb.37: %81:gr32_abcd = COPY %0 %82:gr32 = COPY %0 - bb.38.bb74: + bb.38: successors: %bb.40(0x7fffffff), %bb.39(0x00000001) ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %52:gr32 = COPY killed $eax - MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 :: (store 4 into `%18** undef`) - MOV32mr %82, 1, $noreg, 0, $noreg, %52 :: (store 4 into %ir.tmp79) + MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 + MOV32mr %82, 1, $noreg, 0, $noreg, %52 ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp - MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv :: (store 4 into %ir.tmp81) - MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) - %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg :: (load 4 from %ir.3) - CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tmp864) + MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv + MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 + %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg + CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags JE_1 %bb.40, implicit killed $eflags JMP_1 %bb.39 - bb.39.bb89: + bb.39: successors: - bb.40.bb90: + bb.40: successors: %bb.42(0x00000001), %bb.41(0x7fffffff) %56:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -668,7 +370,7 @@ body: | JNE_1 %bb.42, implicit killed $eflags JMP_1 %bb.41 - bb.41.bb92: + bb.41: successors: %bb.43(0x00000001), %bb.44(0x7fffffff) %58:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -676,15 +378,15 @@ body: | JNE_1 %bb.43, implicit killed $eflags JMP_1 %bb.44 - bb.42.bb94: + bb.42: successors: - bb.43.bb96: + bb.43: successors: - bb.44.bb97: + bb.44: successors: %bb.45(0x00000001), %bb.46(0x7fffffff) %60:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -692,11 +394,11 @@ body: | JNE_1 %bb.46, implicit killed $eflags JMP_1 %bb.45 - bb.45.bb101: + bb.45: successors: - bb.46.bb102: + bb.46: successors: %bb.48(0x7fffffff), %bb.47(0x00000001) %62:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -704,11 +406,11 @@ body: | JNE_1 %bb.48, implicit killed $eflags JMP_1 %bb.47 - bb.47.bb103: + bb.47: successors: - bb.48.bb104: + bb.48: successors: %bb.50(0x7fffffff), %bb.49(0x00000001) %64:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -716,11 +418,11 @@ body: | JNE_1 %bb.50, implicit killed $eflags JMP_1 %bb.49 - bb.49.bb108: + bb.49: successors: - bb.50.bb109: + bb.50: successors: %bb.51(0x00000001), %bb.52(0x7fffffff) %66:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -728,11 +430,11 @@ body: | JNE_1 %bb.52, implicit killed $eflags JMP_1 %bb.51 - bb.51.bb111: + bb.51: successors: - bb.52.bb112: + bb.52: successors: %bb.54(0x7fffffff), %bb.53(0x00000001) %68:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -740,11 +442,11 @@ body: | JNE_1 %bb.54, implicit killed $eflags JMP_1 %bb.53 - bb.53.bb117: + bb.53: successors: - bb.54.bb118: + bb.54: successors: %bb.55(0x00000001), %bb.56(0x7fffffff) %70:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -752,11 +454,11 @@ body: | JNE_1 %bb.56, implicit killed $eflags JMP_1 %bb.55 - bb.55.bb120: + bb.55: successors: - bb.56.bb121: + bb.56: successors: %bb.57(0x00000001), %bb.58(0x7fffffff) %72:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -764,31 +466,31 @@ body: | JNE_1 %bb.58, implicit killed $eflags JMP_1 %bb.57 - bb.57.bb124: + bb.57: successors: - bb.58.bb125: + bb.58: successors: %bb.62(0x00000001), %bb.59(0x7fffffff) - CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.4) + CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags JE_1 %bb.62, implicit killed $eflags JMP_1 %bb.59 - bb.59.bb128: + bb.59: - bb.60.bb129: + bb.60: successors: %bb.60(0x7fffffff), %bb.61(0x00000001) CMP32ri undef %75:gr32, 95406325, implicit-def $eflags JB_1 %bb.61, implicit killed $eflags JMP_1 %bb.60 - bb.61.bb133: + bb.61: successors: - bb.62.bb135: + bb.62: successors: %bb.63, %bb.64 %76:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -796,10 +498,10 @@ body: | JNE_1 %bb.64, implicit killed $eflags JMP_1 %bb.63 - bb.63.bb137: + bb.63: successors: - bb.64.bb138: + bb.64: ... diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 4e436f61e833..c21fc6a73a88 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -2563,3 +2563,160 @@ entry: %e = zext <8 x i6> %d to <8 x i64> ret <8 x i64> %e } + +define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { +; SSE2-LABEL: splatshuf_zext_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v4i64: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v4i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq + %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer + %ext = zext <4 x i32> %shuf to <4 x i64> + ret <4 x i64> %ext +} + +define <8 x i32> @splatshuf_zext_v8i32(<8 x i16> %x) { +; SSE2-LABEL: splatshuf_zext_v8i32: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,5,4] +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v8i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v8i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq + %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> + %ext = zext <8 x i16> %shuf to <8 x i32> + ret <8 x i32> %ext +} + +define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) { +; SSE2-LABEL: splatshuf_zext_v16i16: +; SSE2: # %bb.0: +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,3] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v16i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v16i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512-NEXT: retq + %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> + %ext = zext <16 x i8> %shuf to <16 x i16> + ret <16 x i16> %ext +} diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll index ac535096466c..709e69fbb1da 100644 --- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -2246,84 +2246,84 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer -; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] +; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 ; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP7]] +; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 ; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP11]] +; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP11]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 ; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP15]] +; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP15]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 ; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 -; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP19]] +; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP19]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP21]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 ; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 -; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP23]] +; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 ; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32>* [[TMP26]], align 4 -; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP27]] +; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP27]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 ; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP31:%.*]] = load <4 x i32>, <4 x i32>* [[TMP30]], align 4 -; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP31]] +; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP31]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP32]], <4 x i32>* [[TMP33]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 ; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP35:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 -; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP35]] +; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP35]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], <4 x i32>* [[TMP37]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 ; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP39:%.*]] = load <4 x i32>, <4 x i32>* [[TMP38]], align 4 -; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP39]] +; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP41]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 ; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP43:%.*]] = load <4 x i32>, <4 x i32>* [[TMP42]], align 4 -; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP43]] +; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP45]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 ; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP47:%.*]] = load <4 x i32>, <4 x i32>* [[TMP46]], align 4 -; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP47]] +; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP47]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP49]], align 4 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = load i32, i32* [[A]], align 4 diff --git a/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll index fb3d12d88ba5..8e36a921c758 100644 --- a/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ b/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -66,7 +66,7 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[GEP0]] diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 401776aa270d..14a6d0eb72c8 100644 --- a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -21,7 +21,7 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: ret i32 [[TMP10]] diff --git a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index db02f55dcc77..d3bbf3df8582 100644 --- a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -65,7 +65,7 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]] @@ -86,10 +86,10 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP18]], [[TMP20]] +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] @@ -184,7 +184,7 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP1]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]] @@ -194,10 +194,10 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] diff --git a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll index 7038b0f8e276..6c474705abcf 100644 --- a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -16,8 +16,8 @@ define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %a ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> , [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>* ; CHECK-NEXT: store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index e8a83fa4b7a6..712ff040a918 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -14,7 +14,7 @@ define void @test() #0 { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 diff --git a/test/Transforms/SLPVectorizer/X86/PR35777.ll b/test/Transforms/SLPVectorizer/X86/PR35777.ll index adfe77f89f52..4a403e7b9a3f 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -10,7 +10,7 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] diff --git a/test/Transforms/SLPVectorizer/X86/PR39774.ll b/test/Transforms/SLPVectorizer/X86/PR39774.ll index 67717a54659c..ae4a6b88bd31 100644 --- a/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -10,7 +10,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] @@ -100,7 +100,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[SHUFFLE]] +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], ; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] diff --git a/test/Transforms/SLPVectorizer/X86/PR40310.ll b/test/Transforms/SLPVectorizer/X86/PR40310.ll index ad1434146a5b..2a0b66ee2817 100644 --- a/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -12,7 +12,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]] ; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]] diff --git a/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 2a49864ca1e4..a04beed1a45b 100644 --- a/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -536,12 +536,12 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R7]] ; diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll index 7378b8bcb1c9..2ea29ed95c77 100644 --- a/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -15,7 +15,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll index 9af59efd3453..ad566cb3411e 100644 --- a/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -96,7 +96,7 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]] ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index ec29f8413ace..c16ac5385598 100644 --- a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -20,8 +20,8 @@ define void @reduce_compare(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 ; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] diff --git a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 47e89df5ab6c..550b8314d606 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -68,12 +68,12 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 ; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1 ; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]] +; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer ; AVX-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]] ; AVX-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], ; AVX-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> ; AVX-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], -; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]] +; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer ; AVX-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> , <2 x float> [[TMP14]] ; AVX-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 ; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll index f12de2ad199c..8f57a820197e 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -99,7 +99,7 @@ define void @zot(%struct.hoge* %arg) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index e2d36376f5ea..5c753091f95c 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -31,10 +31,10 @@ define void @main() #0 { ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[ADD_I276_US]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0xBFA5CC2D1960285F, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll index 98db3edd90ea..a3d98e39ce1b 100644 --- a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll +++ b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll @@ -22,7 +22,7 @@ define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[G:%.*]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 5860a24906be..d2512dcd615f 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -18,20 +18,21 @@ define i32 @test(double* nocapture %G) { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[ADD8:%.*]] = fadd double [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 -; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[TMP6]], 4.000000e+00 -; CHECK-NEXT: [[ADD12:%.*]] = fadd double [[MUL11]], 8.000000e+00 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[MUL11]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 -; CHECK-NEXT: store double [[ADD12]], double* [[ARRAYIDX13]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX9]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -72,13 +73,13 @@ define i32 @foo(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef @@ -135,7 +136,7 @@ define i32 @test2(double* nocapture %G, i32 %k) { ; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP13]], align 8 @@ -146,7 +147,7 @@ define i32 @test2(double* nocapture %G, i32 %k) { ; CHECK-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], 3.000000e+00 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP15]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[TMP23]], align 8 @@ -203,13 +204,13 @@ define i32 @foo4(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll index ac6933304780..2ba0a15fed22 100644 --- a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll +++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll @@ -24,7 +24,7 @@ define i32 @foo(i32* nocapture %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ [[TMP4:%.*]], [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> , [[TMP3]] +; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_029]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll index 8ee644f939ba..1e47f7a51fd1 100644 --- a/test/Transforms/SLPVectorizer/X86/external_user.ll +++ b/test/Transforms/SLPVectorizer/X86/external_user.ll @@ -32,9 +32,9 @@ define double @ext_user(double* noalias nocapture %B, double* noalias nocapture ; CHECK: for.body: ; CHECK-NEXT: [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_020]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/test/Transforms/SLPVectorizer/X86/extract.ll b/test/Transforms/SLPVectorizer/X86/extract.ll index 24cf83ca405d..9a741cbb4cfd 100644 --- a/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/test/Transforms/SLPVectorizer/X86/extract.ll @@ -8,7 +8,7 @@ define void @fextr(double* %ptr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[LD]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -32,7 +32,7 @@ define void @fextr1(double* %ptr) { ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[REORDER_SHUFFLE]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[REORDER_SHUFFLE]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -59,7 +59,7 @@ define void @fextr2(double* %ptr) { ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/extractcost.ll b/test/Transforms/SLPVectorizer/X86/extractcost.ll index c9fae4460e57..834f5a089b50 100644 --- a/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -14,7 +14,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll index 885d11acfa15..5abf85f319b9 100644 --- a/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -25,7 +25,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4 diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll index 986da9fa52b9..311d8a476c71 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -36,7 +36,7 @@ define i32 @add_red(float* %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> @@ -76,7 +76,7 @@ define i32 @add_red(float* %A, i32 %n) { ; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll index 493e09a1d609..7e0cfb77c6b7 100644 --- a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll +++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll @@ -21,8 +21,8 @@ define void @in_tree_user(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 429ad84f8a6a..2a4d457f1063 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -132,7 +132,7 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon ; CHECK-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]] ; CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP15]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]] ; CHECK-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8> ; CHECK-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 ; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* @@ -413,52 +413,52 @@ define i32 @foo1() local_unnamed_addr #0 { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], ; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], ; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], ; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], ; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> , [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], ; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], ; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> , [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], ; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> , [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], ; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> , [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], ; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> , [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], ; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> , [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], ; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 ; CHECK-NEXT: br label [[FOR_BODY5:%.*]] ; CHECK: for.cond3: diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index e26eeec63087..5f6e8f143f4e 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -627,7 +627,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 @@ -645,7 +645,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll index 99b340addb92..ffbdd9f1d148 100644 --- a/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -11,22 +11,22 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], ; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[TMP13]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP14]], ; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP16]], ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll index 1b19aeae0377..020b50d54632 100644 --- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -36,7 +36,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP8]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll index d0216103d42a..f785926cc412 100644 --- a/test/Transforms/SLPVectorizer/X86/multi_block.ll +++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll @@ -26,10 +26,10 @@ define i32 @bar(double* nocapture %A, i32 %d) { ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]] ; CHECK: [[TMP6:%.*]] = tail call i32 (...) @foo() ; CHECK-NEXT: br label [[TMP7]] -; CHECK: [[TMP8:%.*]] = fadd <2 x float> , [[TMP3]] +; CHECK: [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8 ; CHECK-NEXT: [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll index ce8594ea84d7..9268adf9481c 100644 --- a/test/Transforms/SLPVectorizer/X86/multi_user.ll +++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll @@ -19,7 +19,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> , [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll index 2354ebd29879..1b959f19883a 100644 --- a/test/Transforms/SLPVectorizer/X86/operandorder.ll +++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -14,7 +14,7 @@ define void @shuffle_operands1(double * noalias %from, double * noalias %to, ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[V1:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll index a0a13b2b5aac..fe604e2652d2 100644 --- a/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/test/Transforms/SLPVectorizer/X86/phi.ll @@ -81,9 +81,9 @@ define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_019]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -150,9 +150,9 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2 @@ -163,27 +163,26 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4 ; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> , float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP12]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float 8.000000e+00, i32 3 -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP11]], [[TMP17]] -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP16]], +; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP23]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -255,7 +254,7 @@ define float @sort_phi_type(float* nocapture readonly %A) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> , [[TMP8]] +; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] diff --git a/test/Transforms/SLPVectorizer/X86/pr35497.ll b/test/Transforms/SLPVectorizer/X86/pr35497.ll index c6989c384e01..bdb37b28d58c 100644 --- a/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -55,7 +55,7 @@ define void @pr35497() local_unnamed_addr #0 { ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 @@ -63,7 +63,7 @@ define void @pr35497() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1 diff --git a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll index 7cc0194c7302..380f58fe5dc8 100644 --- a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -88,7 +88,7 @@ define void @nsw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -124,7 +124,7 @@ define void @not_nsw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -160,7 +160,7 @@ define void @nuw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -196,7 +196,7 @@ define void @not_nuw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -232,7 +232,7 @@ define void @nnan(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_nnan(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -304,7 +304,7 @@ define void @only_fast(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -340,7 +340,7 @@ define void @only_arcp(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/reduction.ll b/test/Transforms/SLPVectorizer/X86/reduction.ll index 03b7f67ae4ca..e9f8e7f7c884 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -23,7 +23,7 @@ define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[MUL]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP3]], [[TMP4]] diff --git a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 47a6a44611d8..0f0bbf9a2ad2 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -14,10 +14,10 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] @@ -32,11 +32,11 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[BIN_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_EXTRA]] ; entry: %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 diff --git a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index d1c7e6e851f5..13884efd98dd 100644 --- a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -18,7 +18,7 @@ define void @hoge() { ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> , [[REORDER_SHUFFLE]] ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> , [[SHUFFLE8]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef @@ -37,7 +37,7 @@ define void @hoge() { ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef diff --git a/test/Transforms/SLPVectorizer/X86/resched.ll b/test/Transforms/SLPVectorizer/X86/resched.ll index b8b1ff00db41..28bc95e2f4ca 100644 --- a/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/test/Transforms/SLPVectorizer/X86/resched.ll @@ -72,7 +72,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15 ; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8> -; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> , [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 ; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1 diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll index f2f858e3c7dd..7e9109a4ef00 100644 --- a/test/Transforms/SLPVectorizer/X86/saxpy.ll +++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll @@ -15,7 +15,7 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]] diff --git a/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll index 3abde37048fd..bff947e28cae 100644 --- a/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ b/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -12,7 +12,7 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([1 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([1 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0), align 4 ; CHECK-NEXT: [[DOTLOBIT_4:%.*]] = lshr i32 [[TMP3]], 31 diff --git a/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/test/Transforms/SLPVectorizer/X86/simple-loop.ll index 975a1af7576a..59b94cad17e4 100644 --- a/test/Transforms/SLPVectorizer/X86/simple-loop.ll +++ b/test/Transforms/SLPVectorizer/X86/simple-loop.ll @@ -14,8 +14,8 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4 diff --git a/test/Transforms/SLPVectorizer/X86/value-bug.ll b/test/Transforms/SLPVectorizer/X86/value-bug.ll index c2f4b981af9e..78df5a1d68dd 100644 --- a/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -33,9 +33,9 @@ define void @test() { ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double> -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> , [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> undef, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef ; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; diff --git a/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index 2b593b78652f..8bf3f362f03c 100644 --- a/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -12,7 +12,7 @@ define void @add0(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -136,7 +136,7 @@ define void @sub1(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -174,7 +174,7 @@ define void @sub2(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -422,7 +422,7 @@ define void @add0f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -546,7 +546,7 @@ define void @sub1f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -584,7 +584,7 @@ define void @sub2f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -751,7 +751,7 @@ define void @add0fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -875,7 +875,7 @@ define void @sub1fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -913,7 +913,7 @@ define void @sub2fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index fd23381949d7..889bba80b7a4 100644 --- a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -17,7 +17,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -91,7 +91,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -169,7 +169,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef diff --git a/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test b/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test new file mode 100644 index 000000000000..9dc63d753f60 --- /dev/null +++ b/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test @@ -0,0 +1,41 @@ +## Show that llvm-objcopy correctly updates the elf header and program header +## table when they are within a segment. + +# RUN: yaml2obj %s -o %t.in +## Validate that the properties are different before the removal. +# RUN: llvm-readobj --file-headers --program-headers %t.in | FileCheck %s --check-prefix=BEFORE +# RUN: llvm-objcopy %t.in %t.out -R .remove_me +# RUN: llvm-readobj --file-headers --program-headers %t.out | FileCheck %s --check-prefix=AFTER + +# BEFORE: SectionHeaderCount: 6 +# BEFORE: Type: PT_LOAD +# BEFORE-NEXT: Offset: 0x0 +# BEFORE: Type: PT_LOAD +# BEFORE-NEXT: Offset: 0x240 + +# AFTER: SectionHeaderCount: 5 +# AFTER: Type: PT_LOAD +# AFTER-NEXT: Offset: 0x0 +# AFTER: Type: PT_LOAD +# AFTER-NEXT: Offset: 0xB0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .remove_me + Type: SHT_PROGBITS + Size: 0x10 + - Name: .keep_me + Type: SHT_PROGBITS + Size: 0x10 +ProgramHeaders: + - Type: PT_LOAD + Offset: 0 + FileSize: 176 # sizeof(Elf64_Ehdr) + 2 * sizeof(Elf64_Phdr) + - Type: PT_LOAD + Sections: + - Section: .keep_me diff --git a/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test b/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test new file mode 100644 index 000000000000..4de3ac9700e5 --- /dev/null +++ b/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test @@ -0,0 +1,639 @@ +# We want to preserve areas in segments that are not covered by section headers. +# This test shows that we do this for areas at the start of a segment, between +# sections in a segment, and after all sections in a segment. +# To create inputs with arbitrary data in segments, not covered by sections, we +# use yaml2obj to create segments with sections covering all areas, then remove +# some sections in those segments, and finally write over the areas of the +# removed sections using python. + +# blob* sections are the sections that will be removed to create unlabelled +# areas and then overwritten with data to show we preserve the data. + +# RUN: yaml2obj %s -o %t.base +# RUN: llvm-objcopy %t.base %t.stripped --regex -R blob.* +# Show that the removal leaves the bytes as zeroes, as desired, for all our +# test cases. +# RUN: od -t x1 -j 0x2000 -N 24 %t.stripped | FileCheck %s --check-prefix=CHECK1 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2100 -N 12 %t.stripped | FileCheck %s --check-prefix=CHECK2 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2200 -N 4 %t.stripped | FileCheck %s --check-prefix=CHECK3 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2300 -N 12 %t.stripped | FileCheck %s --check-prefix=CHECK4 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x3000 -N 68 %t.stripped | FileCheck %s --check-prefix=CHECK5 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x4000 -N 60 %t.stripped | FileCheck %s --check-prefix=CHECK6 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x5000 -N 60 %t.stripped | FileCheck %s --check-prefix=CHECK7 -DPATTERN="00 00 00 00" + +# RUN: cp %t.stripped %t.in +# RUN: echo "with open('%/t.in', 'r+') as input:" > %t.py +# RUN: echo " for offset in [" >> %t.py +# RUN: echo " 0x2000, 0x2008, 0x200C, 0x2014, 0x2104, 0x2300," >> %t.py +# RUN: echo " 0x3008, 0x3010, 0x3018, 0x3020, 0x3028, 0x302C, 0x3034, 0x303C," >> %t.py +# RUN: echo " 0x4000, 0x4008, 0x4010, 0x4014, 0x401C, 0x4024, 0x4034," >> %t.py +# RUN: echo " 0x5000, 0x5008, 0x5010, 0x501C, 0x5024, 0x502C, 0x5030, 0x5038]:" >> %t.py +# RUN: echo " input.seek(offset)" >> %t.py +# RUN: echo " input.write('\xDE\xAD\xBE\xEF')" >> %t.py +# RUN: %python %t.py +# RUN: llvm-objcopy %t.in %t.out +# RUN: od -t x1 -j 0x2000 -N 24 %t.out | FileCheck %s --check-prefix=CHECK1 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2100 -N 12 %t.out | FileCheck %s --check-prefix=CHECK2 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2200 -N 4 %t.out | FileCheck %s --check-prefix=CHECK3 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2300 -N 12 %t.out | FileCheck %s --check-prefix=CHECK4 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x3000 -N 68 %t.out | FileCheck %s --check-prefix=CHECK5 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x4000 -N 60 %t.out | FileCheck %s --check-prefix=CHECK6 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x5000 -N 60 %t.out | FileCheck %s --check-prefix=CHECK7 -DPATTERN="de ad be ef" + +# CHECK1: [[PATTERN]] 11 22 33 44 [[PATTERN]] [[PATTERN]] +# CHECK1-NEXT: 55 66 77 88 [[PATTERN]] +# CHECK2: 99 00 aa bb [[PATTERN]] cc dd ee ff +# CHECK3: fe fe fe fe +# CHECK4: [[PATTERN]] 00 00 00 00 00 00 00 00 +# CHECK5: ff ff ee ee dd dd cc cc [[PATTERN]] bb bb aa aa +# CHECK5-NEXT: [[PATTERN]] 00 00 99 99 [[PATTERN]] 88 88 77 77 +# CHECK5-NEXT: [[PATTERN]] 66 66 55 55 [[PATTERN]] [[PATTERN]] +# CHECK5-NEXT: 44 44 33 33 [[PATTERN]] 22 22 11 11 [[PATTERN]] +# CHECK5-NEXT: 00 11 22 33 +# CHECK6: [[PATTERN]] 44 55 66 77 [[PATTERN]] 88 99 aa bb +# CHECK6-NEXT: [[PATTERN]] [[PATTERN]] cc dd ee ff [[PATTERN]] +# CHECK6-NEXT: ff ee dd cc [[PATTERN]] bb aa 99 88 77 66 55 44 +# CHECK6-NEXT: 33 22 11 00 [[PATTERN]] 11 11 11 11 +# CHECK7: [[PATTERN]] 12 34 56 78 [[PATTERN]] 90 ab cd ef +# CHECK7-NEXT: [[PATTERN]] fe dc ba 09 87 65 43 21 [[PATTERN]] +# CHECK7-NEXT: 22 22 22 22 [[PATTERN]] 33 33 33 33 [[PATTERN]] +# CHECK7-NEXT: [[PATTERN]] 44 44 44 44 [[PATTERN]] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: blob1 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2000 + AddressAlign: 0x2000 + - Name: section1 + Type: SHT_PROGBITS + Address: 0x2004 + Content: '11223344' + - Name: blob2 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2008 + - Name: section2 + Type: SHT_NOBITS + Size: 4 + Address: 0x200C + - Name: blob3 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2010 + - Name: section3 + Type: SHT_PROGBITS + Content: '55667788' + Address: 0x2014 + - Name: blob4 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2018 + - Name: section4 + Type: SHT_PROGBITS + Content: '9900aabb' + Address: 0x2100 + AddressAlign: 0x100 + - Name: blob5 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2104 + - Name: section5 + Type: SHT_PROGBITS + Address: 0x2108 + Content: 'ccddeeff' + - Name: section6 + Type: SHT_PROGBITS + Content: 'fefefefe' + Address: 0x2200 + AddressAlign: 0x100 + - Name: blob6 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2300 + AddressAlign: 0x100 + - Name: sectionA + Type: SHT_PROGBITS + Content: 'ffffeeee' + Address: 0x3000 + AddressAlign: 0x1000 + - Name: sectionB + Type: SHT_PROGBITS + Content: 'ddddcccc' + Address: 0x3004 + - Name: blobA + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3008 + - Name: sectionC + Type: SHT_PROGBITS + Content: 'bbbbaaaa' + Address: 0x300C + - Name: blobB + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3010 + - Name: sectionD + Type: SHT_PROGBITS + Content: '00009999' + Address: 0x3014 + - Name: blobC + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3018 + - Name: sectionE + Type: SHT_PROGBITS + Content: '88887777' + Address: 0x301C + - Name: blobD + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3020 + - Name: sectionF + Type: SHT_PROGBITS + Content: '66665555' + Address: 0x3024 + - Name: blobE + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3028 + - Name: blobF + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x302C + - Name: sectionG + Type: SHT_PROGBITS + Content: '44443333' + Address: 0x3030 + - Name: blobG + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3034 + - Name: sectionH + Type: SHT_PROGBITS + Content: '22221111' + Address: 0x3038 + - Name: blobH + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x303C + - Name: sectionI + Type: SHT_PROGBITS + Content: '00112233' + Address: 0x3040 + - Name: blobz + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4000 + AddressAlign: 0x1000 + - Name: sectionz + Type: SHT_PROGBITS + Content: '44556677' + Address: 0x4004 + - Name: bloby + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4008 + - Name: sectiony + Type: SHT_PROGBITS + Content: '8899aabb' + Address: 0x400C + - Name: blobx + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4010 + - Name: blobw + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4014 + - Name: sectionx + Type: SHT_PROGBITS + Content: 'ccddeeff' + Address: 0x4018 + - Name: blobv + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x401C + - Name: sectionw + Type: SHT_PROGBITS + Content: 'ffeeddcc' + Address: 0x4020 + - Name: blobu + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4024 + - Name: sectionv + Type: SHT_PROGBITS + Content: 'bbaa9988' + Address: 0x4028 + - Name: sectionu + Type: SHT_PROGBITS + Content: '77665544' + Address: 0x402C + - Name: sectiont + Type: SHT_PROGBITS + Content: '33221100' + Address: 0x4030 + - Name: blobt + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4034 + - Name: sections + Type: SHT_PROGBITS + Content: '11111111' + Address: 0x4038 + - Name: bloba + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5000 + AddressAlign: 0x1000 + - Name: sectiona + Type: SHT_PROGBITS + Content: '12345678' + Address: 0x5004 + - Name: blobb + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5008 + - Name: sectionb + Type: SHT_PROGBITS + Content: '90abcdef' + Address: 0x500C + - Name: blobc + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5010 + - Name: sectionc + Type: SHT_PROGBITS + Content: 'fedcba09' + Address: 0x5014 + - Name: sectiond + Type: SHT_PROGBITS + Content: '87654321' + Address: 0x5018 + - Name: blobd + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x501C + - Name: sectione + Type: SHT_PROGBITS + Content: '22222222' + Address: 0x5020 + - Name: blobe + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5024 + - Name: sectionf + Type: SHT_PROGBITS + Content: '33333333' + Address: 0x5028 + - Name: blobf + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x502C + - Name: blobg + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5030 + - Name: sectiong + Type: SHT_PROGBITS + Content: '44444444' + Address: 0x5034 + - Name: blobh + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5038 +ProgramHeaders: + # First segment has unlabelled space at start and end. + - Type: 0x6ABCDEF0 # Non-specific segment type. + VAddr: 0x2000 + PAddr: 0x2000 + Align: 0x2000 + Sections: + - Section: blob1 + - Section: section1 + - Section: blob2 + - Section: section2 # nobits + - Section: blob3 + - Section: section3 + - Section: blob4 + # Second segment has sections at start and end. + - Type: 0x6ABCDEF0 + VAddr: 0x2100 + PAddr: 0x2100 + Align: 0x100 + Sections: + - Section: section4 + - Section: blob5 + - Section: section5 + # Third segment is all covered by a section. + - Type: 0x6ABCDEF0 + VAddr: 0x2200 + PAddr: 0x2200 + Align: 0x100 + Sections: + - Section: section6 + # Fourth segment has no sections (after removing blob headers). + - Type: 0x6ABCDEF0 + VAddr: 0x2300 + PAddr: 0x2300 + Align: 0x100 + Sections: + - Section: blob6 + # Fifth segment is empty. + - Type: 0x6ABCDEF0 + VAddr: 0x2308 + PAddr: 0x2308 + Offset: 0x2308 + + # The next few segments test behaviour of fully nested segments. + # Sixth segment is the "parent" segment. + - Type: 0x6ABCDEF0 + VAddr: 0x3000 + PAddr: 0x3000 + Align: 0x1000 + Sections: + - Section: sectionA + - Section: sectionB + - Section: blobA + - Section: sectionC + - Section: blobB + - Section: sectionD + - Section: blobC + - Section: sectionE + - Section: blobD + - Section: sectionF + - Section: blobE + - Section: blobF + - Section: sectionG + - Section: blobG + - Section: sectionH + - Section: blobH + - Section: sectionI + # Seventh segment is empty and nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3002 + PAddr: 0x3002 + Offset: 0x3002 + # Eighth segment contains only a section and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3004 + PAddr: 0x3004 + Sections: + - Section: sectionB + # Ninth segment contains only unlabelled space and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3008 + PAddr: 0x3008 + Sections: + - Section: blobA + # Tenth segment contains two sections with space between and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x300C + PAddr: 0x300C + Sections: + - Section: sectionC + - Section: blobB + - Section: sectionD + # Eleventh segment contains two sections with space between and at ends and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3018 + PAddr: 0x3018 + Sections: + - Section: blobC + - Section: sectionE + - Section: blobD + - Section: sectionF + - Section: blobE + # Twelfth segment contains one section with space at ends adjacent to space in parent segment. + - Type: 0x6ABCDEF0 + VAddr: 0x302E + PAddr: 0x302E + Offset: 0x302E + FileSize: 8 + Sections: + - Section: sectionG + # Thirteenth segment contains overlaps sections at either end in parent segment. + - Type: 0x6ABCDEF0 + VAddr: 0x303A + PAddr: 0x303A + Offset: 0x303A + FileSize: 0x8 + Sections: + - Section: blobH + + # The next batch of segments are segments that only partially overlap other segments. + + # Segment14: |-unlabelled-|-Sec-| + # Segment15: |--|-Sec-|-unlabelled-| + - Type: 0x6ABCDEF0 + VAddr: 0x4000 + PAddr: 0x4000 + Sections: + - Section: blobz + - Section: sectionz + - Type: 0x6ABCDEF0 + VAddr: 0x4002 + PAddr: 0x4002 + Offset: 0x4002 + Sections: + - Section: sectionz + - Section: bloby + + # Segment16: |-Sec-|--| + # Segment17: |--|----unlabelled---| + - Type: 0x6ABCDEF0 + VAddr: 0x400C + PAddr: 0x400C + FileSize: 6 + Sections: + - Section: sectiony + - Type: 0x6ABCDEF0 + VAddr: 0x400E + PAddr: 0x400E + Offset: 0x400E + Sections: + - Section: blobx + + # Segment18: |-unlabelled-|-Sec-| + # Segment19: |-Sec-|-unlabelled-| + - Type: 0x6ABCDEF0 + VAddr: 0x4014 + PAddr: 0x4014 + Sections: + - Section: blobw + - Section: sectionx + - Type: 0x6ABCDEF0 + VAddr: 0x4018 + PAddr: 0x4018 + Sections: + - Section: sectionx + - Section: blobv + + # Segment20: |-Sec-| + # Segment21: |--|-unlabelled-|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x4020 + PAddr: 0x4020 + Sections: + - Section: sectionw + - Type: 0x6ABCDEF0 + VAddr: 0x4022 + PAddr: 0x4022 + Offset: 0x4022 + Sections: + - Section: blobu + - Section: sectionv + + # Segment22: |-Sec-| + # Segment23: |--|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x402C + PAddr: 0x402C + Sections: + - Section: sectionu + - Type: 0x6ABCDEF0 + VAddr: 0x402E + PAddr: 0x402E + Offset: 0x402E + Sections: + - Section: sectiont + + # Segment24: |-unlabelled-|--| + # Segment25: |--Sec--| + - Type: 0x6ABCDEF0 + VAddr: 0x4034 + PAddr: 0x4034 + FileSize: 6 + Sections: + - Section: blobt + - Type: 0x6ABCDEF0 + VAddr: 0x4038 + PAddr: 0x4038 + Sections: + - Section: sections + + # The next batch of segments represent groups of three nested/overlapping segments, + # with one parent segment containing two overlapping segments. + + # Segment26: |-unlabelled-|-Sec-|-unlabelled-| + # Segment27: |------------|--| + # Segment28: |-Sec-|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5000 + PAddr: 0x5000 + Align: 0x1000 + Sections: + - Section: bloba + - Section: sectiona + - Section: blobb + - Type: 0x6ABCDEF0 + VAddr: 0x5000 + PAddr: 0x5000 + FileSize: 6 + Sections: + - Section: bloba + - Type: 0x6ABCDEF0 + VAddr: 0x5004 + PAddr: 0x5004 + Sections: + - Section: sectiona + - Section: blobb + + # Segment29: |-Sec-|-unlabelled-|-Sec-| + # Segment30: |-Sec-|--------| + # Segment31: |---------|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x500C + PAddr: 0x500C + Sections: + - Section: sectionb + - Section: blobc + - Section: sectionc + - Type: 0x6ABCDEF0 + VAddr: 0x500C + PAddr: 0x500C + FileSize: 7 + Sections: + - Section: sectionb + - Type: 0x6ABCDEF0 + VAddr: 0x5011 + PAddr: 0x5011 + Offset: 0x5011 + Sections: + - Section: sectionc + + # Segment32: |-Sec-|-unlabelled-|-Sec-| + # Segment33: |-Sec-|------------| + # Segment34: |------------|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x5018 + PAddr: 0x5018 + Sections: + - Section: sectiond + - Section: blobd + - Section: sectione + - Type: 0x6ABCDEF0 + VAddr: 0x5018 + PAddr: 0x5018 + Sections: + - Section: sectiond + - Section: blobd + - Type: 0x6ABCDEF0 + VAddr: 0x501C + PAddr: 0x501C + Sections: + - Section: blobd + - Section: sectione + + # Segment35: |-unlabelled-|-Sec-|-unlabelled-| + # Segment36: |------------|-Sec-| + # Segment37: |-Sec-|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5024 + PAddr: 0x5024 + Sections: + - Section: blobe + - Section: sectionf + - Section: blobf + - Type: 0x6ABCDEF0 + VAddr: 0x5024 + PAddr: 0x5024 + Sections: + - Section: blobe + - Section: sectionf + - Type: 0x6ABCDEF0 + VAddr: 0x5028 + PAddr: 0x5028 + Sections: + - Section: sectionf + - Section: blobf + + # Segment38: |-unlabelled-|-Sec-|-unlabelled-| + # Segment39: |------------|---| + # Segment40: |---|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5030 + PAddr: 0x5030 + Sections: + - Section: blobg + - Section: sectiong + - Section: blobh + - Type: 0x6ABCDEF0 + VAddr: 0x5030 + PAddr: 0x5030 + FileSize: 7 + Sections: + - Section: blobg + - Type: 0x6ABCDEF0 + VAddr: 0x5035 + PAddr: 0x5035 + Offset: 0x5035 + Sections: + - Section: blobh diff --git a/test/tools/llvm-readobj/elf-versioninfo.test b/test/tools/llvm-readobj/elf-versioninfo.test index 82029cf82fd3..7ef599e1370b 100644 --- a/test/tools/llvm-readobj/elf-versioninfo.test +++ b/test/tools/llvm-readobj/elf-versioninfo.test @@ -1,106 +1,119 @@ // Test that llvm-readobj dumps version info tags correctly. -RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s +RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERDEF +RUN: llvm-readelf -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERDEF -CHECK: 0x000000006FFFFFF0 VERSYM 0x24C -CHECK: 0x000000006FFFFFFC VERDEF 0x25C -CHECK: 0x000000006FFFFFFD VERDEFNUM 3 +LLVM-VERDEF: 0x000000006FFFFFF0 VERSYM 0x24C +LLVM-VERDEF: 0x000000006FFFFFFC VERDEF 0x25C +LLVM-VERDEF: 0x000000006FFFFFFD VERDEFNUM 3 -CHECK: Version symbols { -CHECK-NEXT: Section Name: .gnu.version (20) -CHECK-NEXT: Address: 0x24C -CHECK-NEXT: Offset: 0x24C -CHECK-NEXT: Link: 1 -CHECK-NEXT: Symbols [ -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 0 -CHECK-NEXT: Name: {{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: _end{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: _edata{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 3 -CHECK-NEXT: Name: goo@@VERSION2 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: __bss_start{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 2 -CHECK-NEXT: Name: foo@@VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 2 -CHECK-NEXT: Name: VERSION1@@VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 3 -CHECK-NEXT: Name: VERSION2@@VERSION2 -CHECK-NEXT: } -CHECK-NEXT: ] -CHECK-NEXT: } +LLVM-VERDEF: Version symbols { +LLVM-VERDEF-NEXT: Section Name: .gnu.version (20) +LLVM-VERDEF-NEXT: Address: 0x24C +LLVM-VERDEF-NEXT: Offset: 0x24C +LLVM-VERDEF-NEXT: Link: 1 +LLVM-VERDEF-NEXT: Symbols [ +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 0 +LLVM-VERDEF-NEXT: Name: {{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: _end{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: _edata{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 3 +LLVM-VERDEF-NEXT: Name: goo@@VERSION2 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: __bss_start{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 2 +LLVM-VERDEF-NEXT: Name: foo@@VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 2 +LLVM-VERDEF-NEXT: Name: VERSION1@@VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 3 +LLVM-VERDEF-NEXT: Name: VERSION2@@VERSION2 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: ] +LLVM-VERDEF-NEXT: } -CHECK: SHT_GNU_verdef { -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: Base (0x1) -CHECK-NEXT: Index: 1 -CHECK-NEXT: Hash: 430712 -CHECK-NEXT: Name: blah -CHECK-NEXT: } -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: 0x0 -CHECK-NEXT: Index: 2 -CHECK-NEXT: Hash: 175630257 -CHECK-NEXT: Name: VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: 0x0 -CHECK-NEXT: Index: 3 -CHECK-NEXT: Hash: 175630258 -CHECK-NEXT: Name: VERSION2 -CHECK-NEXT: Predecessor: VERSION1 -CHECK-NEXT: } -CHECK-NEXT: } +LLVM-VERDEF: SHT_GNU_verdef { +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: Base (0x1) +LLVM-VERDEF-NEXT: Index: 1 +LLVM-VERDEF-NEXT: Hash: 430712 +LLVM-VERDEF-NEXT: Name: blah +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: 0x0 +LLVM-VERDEF-NEXT: Index: 2 +LLVM-VERDEF-NEXT: Hash: 175630257 +LLVM-VERDEF-NEXT: Name: VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: 0x0 +LLVM-VERDEF-NEXT: Index: 3 +LLVM-VERDEF-NEXT: Hash: 175630258 +LLVM-VERDEF-NEXT: Name: VERSION2 +LLVM-VERDEF-NEXT: Predecessor: VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: } -RUN: llvm-readobj -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=VERNEED +GNU-VERDEF: 0x000000006ffffff0 VERSYM 0x24c +GNU-VERDEF: 0x000000006ffffffc VERDEF 0x25c +GNU-VERDEF: 0x000000006ffffffd VERDEFNUM 3 + +GNU-VERDEF: Dumper for .gnu.version is not implemented +GNU-VERDEF: Dumper for .gnu.version_d is not implemented + +RUN: llvm-readobj -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERNEED +RUN: llvm-readelf -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERNEED + +LLVM-VERNEED: SHT_GNU_verneed { +LLVM-VERNEED-NEXT: Dependency { +LLVM-VERNEED-NEXT: Version: 1 +LLVM-VERNEED-NEXT: Count: 2 +LLVM-VERNEED-NEXT: FileName: verneed1.so.0 +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1938 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 3 +LLVM-VERNEED-NEXT: Name: v2 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1939 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 2 +LLVM-VERNEED-NEXT: Name: v3 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: Dependency { +LLVM-VERNEED-NEXT: Version: 1 +LLVM-VERNEED-NEXT: Count: 1 +LLVM-VERNEED-NEXT: FileName: verneed2.so.0 +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1937 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 4 +LLVM-VERNEED-NEXT: Name: v1 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } + +GNU-VERNEED: Dumper for .gnu.version is not implemented +GNU-VERNEED: Dumper for .gnu.version_r is not implemented -VERNEED: SHT_GNU_verneed { -VERNEED-NEXT: Dependency { -VERNEED-NEXT: Version: 1 -VERNEED-NEXT: Count: 2 -VERNEED-NEXT: FileName: verneed1.so.0 -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1938 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 3 -VERNEED-NEXT: Name: v2 -VERNEED-NEXT: } -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1939 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 2 -VERNEED-NEXT: Name: v3 -VERNEED-NEXT: } -VERNEED-NEXT: } -VERNEED-NEXT: Dependency { -VERNEED-NEXT: Version: 1 -VERNEED-NEXT: Count: 1 -VERNEED-NEXT: FileName: verneed2.so.0 -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1937 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 4 -VERNEED-NEXT: Name: v1 -VERNEED-NEXT: } -VERNEED-NEXT: } -VERNEED-NEXT: } diff --git a/test/tools/yaml2obj/verdef-section.yaml b/test/tools/yaml2obj/verdef-section.yaml index f81bcf196f6f..deac6e736c0b 100644 --- a/test/tools/yaml2obj/verdef-section.yaml +++ b/test/tools/yaml2obj/verdef-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s # Check we are able to handle the SHT_GNU_verdef sections. diff --git a/test/tools/yaml2obj/verneed-section.yaml b/test/tools/yaml2obj/verneed-section.yaml index 436e54ba0893..2fc58ad64f0f 100644 --- a/test/tools/yaml2obj/verneed-section.yaml +++ b/test/tools/yaml2obj/verneed-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s # Check we are able to handle the SHT_GNU_verneed sections. diff --git a/test/tools/yaml2obj/versym-section.yaml b/test/tools/yaml2obj/versym-section.yaml index 31dfecfa297c..3c08ddd63d2e 100644 --- a/test/tools/yaml2obj/versym-section.yaml +++ b/test/tools/yaml2obj/versym-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s ## Check we are able to produce a valid SHT_GNU_versym ## section from its description. diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index fc1ac38ec872..1ae802ff14b3 100644 --- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -298,110 +298,94 @@ static bool isUnneededSymbol(const Symbol &Sym) { Sym.Type != STT_FILE && Sym.Type != STT_SECTION; } -// This function handles the high level operations of GNU objcopy including -// handling command line options. It's important to outline certain properties -// we expect to hold of the command line operations. Any operation that "keeps" -// should keep regardless of a remove. Additionally any removal should respect -// any previous removals. Lastly whether or not something is removed shouldn't -// depend a) on the order the options occur in or b) on some opaque priority -// system. The only priority is that keeps/copies overrule removes. -static Error handleArgs(const CopyConfig &Config, Object &Obj, - const Reader &Reader, ElfType OutputElfType) { - - if (!Config.SplitDWO.empty()) - if (Error E = - splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) - return E; - - if (Config.OutputArch) { - Obj.Machine = Config.OutputArch.getValue().EMachine; - Obj.OSABI = Config.OutputArch.getValue().OSABI; - } - +static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // TODO: update or remove symbols only if there is an option that affects // them. - if (Obj.SymbolTable) { - Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { - // Common and undefined symbols don't make sense as local symbols, and can - // even cause crashes if we localize those, so skip them. - if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && - ((Config.LocalizeHidden && - (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || - is_contained(Config.SymbolsToLocalize, Sym.Name))) - Sym.Binding = STB_LOCAL; - - // Note: these two globalize flags have very similar names but different - // meanings: - // - // --globalize-symbol: promote a symbol to global - // --keep-global-symbol: all symbols except for these should be made local - // - // If --globalize-symbol is specified for a given symbol, it will be - // global in the output file even if it is not included via - // --keep-global-symbol. Because of that, make sure to check - // --globalize-symbol second. - if (!Config.SymbolsToKeepGlobal.empty() && - !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_LOCAL; - - if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_GLOBAL; - - if (is_contained(Config.SymbolsToWeaken, Sym.Name) && - Sym.Binding == STB_GLOBAL) - Sym.Binding = STB_WEAK; - - if (Config.Weaken && Sym.Binding == STB_GLOBAL && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_WEAK; - - const auto I = Config.SymbolsToRename.find(Sym.Name); - if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); - - if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) - Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); - }); - - // The purpose of this loop is to mark symbols referenced by sections - // (like GroupSection or RelocationSection). This way, we know which - // symbols are still 'needed' and which are not. - if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty()) { - for (auto &Section : Obj.sections()) - Section.markSymbols(); - } + if (!Obj.SymbolTable) + return Error::success(); + + Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { + // Common and undefined symbols don't make sense as local symbols, and can + // even cause crashes if we localize those, so skip them. + if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && + ((Config.LocalizeHidden && + (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || + is_contained(Config.SymbolsToLocalize, Sym.Name))) + Sym.Binding = STB_LOCAL; + + // Note: these two globalize flags have very similar names but different + // meanings: + // + // --globalize-symbol: promote a symbol to global + // --keep-global-symbol: all symbols except for these should be made local + // + // If --globalize-symbol is specified for a given symbol, it will be + // global in the output file even if it is not included via + // --keep-global-symbol. Because of that, make sure to check + // --globalize-symbol second. + if (!Config.SymbolsToKeepGlobal.empty() && + !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_LOCAL; + + if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_GLOBAL; + + if (is_contained(Config.SymbolsToWeaken, Sym.Name) && + Sym.Binding == STB_GLOBAL) + Sym.Binding = STB_WEAK; + + if (Config.Weaken && Sym.Binding == STB_GLOBAL && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_WEAK; + + const auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = I->getValue(); + + if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) + Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); + }); + + // The purpose of this loop is to mark symbols referenced by sections + // (like GroupSection or RelocationSection). This way, we know which + // symbols are still 'needed' and which are not. + if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty()) { + for (auto &Section : Obj.sections()) + Section.markSymbols(); + } - auto RemoveSymbolsPred = [&](const Symbol &Sym) { - if (is_contained(Config.SymbolsToKeep, Sym.Name) || - (Config.KeepFileSymbols && Sym.Type == STT_FILE)) - return false; + auto RemoveSymbolsPred = [&](const Symbol &Sym) { + if (is_contained(Config.SymbolsToKeep, Sym.Name) || + (Config.KeepFileSymbols && Sym.Type == STT_FILE)) + return false; - if ((Config.DiscardMode == DiscardType::All || - (Config.DiscardMode == DiscardType::Locals && - StringRef(Sym.Name).startswith(".L"))) && - Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && - Sym.Type != STT_FILE && Sym.Type != STT_SECTION) - return true; + if ((Config.DiscardMode == DiscardType::All || + (Config.DiscardMode == DiscardType::Locals && + StringRef(Sym.Name).startswith(".L"))) && + Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && + Sym.Type != STT_FILE && Sym.Type != STT_SECTION) + return true; - if (Config.StripAll || Config.StripAllGNU) - return true; + if (Config.StripAll || Config.StripAllGNU) + return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) - return true; + if (is_contained(Config.SymbolsToRemove, Sym.Name)) + return true; - if ((Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && - isUnneededSymbol(Sym)) - return true; + if ((Config.StripUnneeded || + is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && + isUnneededSymbol(Sym)) + return true; - return false; - }; - if (Error E = Obj.removeSymbols(RemoveSymbolsPred)) - return E; - } + return false; + }; + + return Obj.removeSymbols(RemoveSymbolsPred); +} +static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { SectionPred RemovePred = [](const SectionBase &) { return false; }; // Removes: @@ -535,7 +519,33 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, return &Obj.addSection(*CS); }); - if (Error E = Obj.removeSections(RemovePred)) + return Obj.removeSections(RemovePred); +} + +// This function handles the high level operations of GNU objcopy including +// handling command line options. It's important to outline certain properties +// we expect to hold of the command line operations. Any operation that "keeps" +// should keep regardless of a remove. Additionally any removal should respect +// any previous removals. Lastly whether or not something is removed shouldn't +// depend a) on the order the options occur in or b) on some opaque priority +// system. The only priority is that keeps/copies overrule removes. +static Error handleArgs(const CopyConfig &Config, Object &Obj, + const Reader &Reader, ElfType OutputElfType) { + + if (!Config.SplitDWO.empty()) + if (Error E = + splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) + return E; + + if (Config.OutputArch) { + Obj.Machine = Config.OutputArch.getValue().EMachine; + Obj.OSABI = Config.OutputArch.getValue().OSABI; + } + + if (Error E = updateAndRemoveSymbols(Config, Obj)) + return E; + + if (Error E = replaceAndRemoveSections(Config, Obj)) return E; if (!Config.SectionsToRename.empty()) { diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp index 4639d9053943..7cceb70ca63b 100644 --- a/tools/llvm-objcopy/ELF/Object.cpp +++ b/tools/llvm-objcopy/ELF/Object.cpp @@ -906,7 +906,9 @@ template void ELFBuilder::setParentSegment(Segment &Child) { template void ELFBuilder::readProgramHeaders() { uint32_t Index = 0; for (const auto &Phdr : unwrapOrError(ElfFile.program_headers())) { - Segment &Seg = Obj.addSegment(); + ArrayRef Data{ElfFile.base() + Phdr.p_offset, + (size_t)Phdr.p_filesz}; + Segment &Seg = Obj.addSegment(Data); Seg.Type = Phdr.p_type; Seg.Flags = Phdr.p_flags; Seg.OriginalOffset = Phdr.p_offset; @@ -1350,7 +1352,31 @@ template void ELFWriter::writeShdrs() { template void ELFWriter::writeSectionData() { for (auto &Sec : Obj.sections()) - Sec.accept(*SecWriter); + // Segments are responsible for writing their contents, so only write the + // section data if the section is not in a segment. Note that this renders + // sections in segments effectively immutable. + if (Sec.ParentSegment == nullptr) + Sec.accept(*SecWriter); +} + +template void ELFWriter::writeSegmentData() { + for (Segment &Seg : Obj.segments()) { + uint8_t *B = Buf.getBufferStart() + Seg.Offset; + assert(Seg.FileSize == Seg.getContents().size() && + "Segment size must match contents size"); + std::memcpy(B, Seg.getContents().data(), Seg.FileSize); + } + + // Iterate over removed sections and overwrite their old data with zeroes. + for (auto &Sec : Obj.removedSections()) { + Segment *Parent = Sec.ParentSegment; + if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0) + continue; + uint64_t Offset = + Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset; + uint8_t *B = Buf.getBufferStart(); + std::memset(B + Offset, 0, Sec.Size); + } } Error Object::removeSections( @@ -1396,7 +1422,10 @@ Error Object::removeSections( return E; } - // Now finally get rid of them all togethor. + // Transfer removed sections into the Object RemovedSections container for use + // later. + std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); + // Now finally get rid of them all together. Sections.erase(Iter, std::end(Sections)); return Error::success(); } @@ -1542,6 +1571,9 @@ template size_t ELFWriter::totalSize() const { } template Error ELFWriter::write() { + // Segment data must be written first, so that the ELF header and program + // header tables can overwrite it, if covered by a segment. + writeSegmentData(); writeEhdr(); writePhdrs(); writeSectionData(); diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h index e892d066a6cd..26d6a122c468 100644 --- a/tools/llvm-objcopy/ELF/Object.h +++ b/tools/llvm-objcopy/ELF/Object.h @@ -215,6 +215,7 @@ template class ELFWriter : public Writer { void writePhdrs(); void writeShdrs(); void writeSectionData(); + void writeSegmentData(); void assignOffsets(); @@ -312,6 +313,10 @@ class Segment { uint32_t Index; uint64_t OriginalOffset; Segment *ParentSegment = nullptr; + ArrayRef Contents; + + explicit Segment(ArrayRef Data) : Contents(Data) {} + Segment() {} const SectionBase *firstSection() const { if (!Sections.empty()) @@ -321,6 +326,8 @@ class Segment { void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } void addSection(const SectionBase *Sec) { Sections.insert(Sec); } + + ArrayRef getContents() const { return Contents; } }; class Section : public SectionBase { @@ -773,6 +780,7 @@ class Object { std::vector Sections; std::vector Segments; + std::vector RemovedSections; public: template @@ -815,6 +823,8 @@ class Object { find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); return SecIt == Sections.end() ? nullptr : SecIt->get(); } + SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } + Range segments() { return make_pointee_range(Segments); } ConstRange segments() const { return make_pointee_range(Segments); } @@ -827,8 +837,8 @@ class Object { Ptr->Index = Sections.size(); return *Ptr; } - Segment &addSegment() { - Segments.emplace_back(llvm::make_unique()); + Segment &addSegment(ArrayRef Data) { + Segments.emplace_back(llvm::make_unique(Data)); return *Segments.back(); } }; diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index 8c1c6fef89ec..091456329f91 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -342,6 +342,12 @@ template class DumpStyle { virtual void printProgramHeaders(const ELFFile *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; + virtual void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; + virtual void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; + virtual void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; virtual void printHashHistogram(const ELFFile *Obj) = 0; virtual void printCGProfile(const ELFFile *Obj) = 0; virtual void printAddrsig(const ELFFile *Obj) = 0; @@ -376,6 +382,12 @@ template class GNUStyle : public DumpStyle { size_t Offset) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; + void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; void printHashHistogram(const ELFFile *Obj) override; void printCGProfile(const ELFFile *Obj) override; void printAddrsig(const ELFFile *Obj) override; @@ -470,6 +482,12 @@ template class LLVMStyle : public DumpStyle { void printDynamicRelocations(const ELFO *Obj) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; + void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; void printHashHistogram(const ELFFile *Obj) override; void printCGProfile(const ELFFile *Obj) override; void printAddrsig(const ELFFile *Obj) override; @@ -607,143 +625,6 @@ template void ELFDumper::LoadVersionMap() const { LoadVersionNeeds(dot_gnu_version_r_sec); } -template -static void printVersionSymbolSection(ELFDumper *Dumper, const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - DictScope SS(W, "Version symbols"); - if (!Sec) - return; - StringRef Name = unwrapOrError(Obj->getSectionName(Sec)); - W.printNumber("Section Name", Name, Sec->sh_name); - W.printHex("Address", Sec->sh_addr); - W.printHex("Offset", Sec->sh_offset); - W.printNumber("Link", Sec->sh_link); - - const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset; - StringRef StrTable = Dumper->getDynamicStringTable(); - - // Same number of entries in the dynamic symbol table (DT_SYMTAB). - ListScope Syms(W, "Symbols"); - for (const typename ELFO::Elf_Sym &Sym : Dumper->dynamic_symbols()) { - DictScope S(W, "Symbol"); - std::string FullSymbolName = - Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */); - W.printNumber("Version", *P); - W.printString("Name", FullSymbolName); - P += sizeof(typename ELFO::Elf_Half); - } -} - -static const EnumEntry SymVersionFlags[] = { - {"Base", "BASE", VER_FLG_BASE}, - {"Weak", "WEAK", VER_FLG_WEAK}, - {"Info", "INFO", VER_FLG_INFO}}; - -template -static void printVersionDefinitionSection(ELFDumper *Dumper, - const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - using VerDef = typename ELFO::Elf_Verdef; - using VerdAux = typename ELFO::Elf_Verdaux; - - DictScope SD(W, "SHT_GNU_verdef"); - if (!Sec) - return; - - const uint8_t *SecStartAddress = - (const uint8_t *)Obj->base() + Sec->sh_offset; - const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; - const uint8_t *P = SecStartAddress; - const typename ELFO::Elf_Shdr *StrTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - - unsigned VerDefsNum = Sec->sh_info; - while (VerDefsNum--) { - if (P + sizeof(VerDef) > SecEndAddress) - report_fatal_error("invalid offset in the section"); - - auto *VD = reinterpret_cast(P); - DictScope Def(W, "Definition"); - W.printNumber("Version", VD->vd_version); - W.printEnum("Flags", VD->vd_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", VD->vd_ndx); - W.printNumber("Hash", VD->vd_hash); - W.printString("Name", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - VD->getAux()->vda_name))); - if (!VD->vd_cnt) - report_fatal_error("at least one definition string must exist"); - if (VD->vd_cnt > 2) - report_fatal_error("more than one predecessor is not expected"); - - if (VD->vd_cnt == 2) { - const uint8_t *PAux = P + VD->vd_aux + VD->getAux()->vda_next; - const VerdAux *Aux = reinterpret_cast(PAux); - W.printString("Predecessor", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Aux->vda_name))); - } - - P += VD->vd_next; - } -} - -template -static void printVersionDependencySection(ELFDumper *Dumper, - const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - using VerNeed = typename ELFO::Elf_Verneed; - using VernAux = typename ELFO::Elf_Vernaux; - - DictScope SD(W, "SHT_GNU_verneed"); - if (!Sec) - return; - - const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset; - const typename ELFO::Elf_Shdr *StrTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - - const uint8_t *P = SecData; - unsigned VerNeedNum = Sec->sh_info; - for (unsigned I = 0; I < VerNeedNum; ++I) { - const VerNeed *Need = reinterpret_cast(P); - DictScope Entry(W, "Dependency"); - W.printNumber("Version", Need->vn_version); - W.printNumber("Count", Need->vn_cnt); - W.printString("FileName", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Need->vn_file))); - - const uint8_t *PAux = P + Need->vn_aux; - for (unsigned J = 0; J < Need->vn_cnt; ++J) { - const VernAux *Aux = reinterpret_cast(PAux); - DictScope Entry(W, "Entry"); - W.printNumber("Hash", Aux->vna_hash); - W.printEnum("Flags", Aux->vna_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", Aux->vna_other); - W.printString("Name", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Aux->vna_name))); - PAux += Aux->vna_next; - } - P += Need->vn_next; - } -} - -template void ELFDumper::printVersionInfo() { - // Dump version symbol section. - printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W); - - // Dump version definition section. - printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W); - - // Dump version dependency section. - printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W); -} - template StringRef ELFDumper::getSymbolVersion(StringRef StrTab, const Elf_Sym *symb, @@ -925,6 +806,11 @@ static const EnumEntry ElfOSABI[] = { {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} }; +static const EnumEntry SymVersionFlags[] = { + {"Base", "BASE", VER_FLG_BASE}, + {"Weak", "WEAK", VER_FLG_WEAK}, + {"Info", "INFO", VER_FLG_INFO}}; + static const EnumEntry AMDGPUElfOSABI[] = { {"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA}, {"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL}, @@ -1616,6 +1502,20 @@ void ELFDumper::printProgramHeaders( PrintSectionMapping); } +template void ELFDumper::printVersionInfo() { + // Dump version symbol section. + ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(), + dot_gnu_version_sec); + + // Dump version definition section. + ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(), + dot_gnu_version_d_sec); + + // Dump version dependency section. + ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(), + dot_gnu_version_r_sec); +} + template void ELFDumper::printDynamicRelocations() { ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile()); } @@ -3460,6 +3360,36 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { } } +template +void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + +template +void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + +template +void GNUStyle::printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + // Hash histogram shows statistics of how efficient the hash was for the // dynamic symbol table. The table shows number of hash buckets for different // lengths of chains as absolute number and percentage of the total buckets. @@ -4538,6 +4468,122 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { } } +template +void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SS(W, "Version symbols"); + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + W.printNumber("Section Name", SecName, Sec->sh_name); + W.printHex("Address", Sec->sh_addr); + W.printHex("Offset", Sec->sh_offset); + W.printNumber("Link", Sec->sh_link); + + const uint8_t *VersymBuf = (const uint8_t *)Obj->base() + Sec->sh_offset; + const ELFDumper *Dumper = this->dumper(); + StringRef StrTable = Dumper->getDynamicStringTable(); + + // Same number of entries in the dynamic symbol table (DT_SYMTAB). + ListScope Syms(W, "Symbols"); + for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) { + DictScope S(W, "Symbol"); + const Elf_Versym *Versym = reinterpret_cast(VersymBuf); + std::string FullSymbolName = + Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */); + W.printNumber("Version", Versym->vs_index & VERSYM_VERSION); + W.printString("Name", FullSymbolName); + VersymBuf += sizeof(Elf_Versym); + } +} + +template +void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SD(W, "SHT_GNU_verdef"); + if (!Sec) + return; + + const uint8_t *SecStartAddress = + (const uint8_t *)Obj->base() + Sec->sh_offset; + const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; + const uint8_t *VerdefBuf = SecStartAddress; + const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + + unsigned VerDefsNum = Sec->sh_info; + while (VerDefsNum--) { + if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress) + // FIXME: report_fatal_error is not a good way to report error. We should + // emit a parsing error here and below. + report_fatal_error("invalid offset in the section"); + + const Elf_Verdef *Verdef = reinterpret_cast(VerdefBuf); + DictScope Def(W, "Definition"); + W.printNumber("Version", Verdef->vd_version); + W.printEnum("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", Verdef->vd_ndx); + W.printNumber("Hash", Verdef->vd_hash); + W.printString("Name", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verdef->getAux()->vda_name))); + if (!Verdef->vd_cnt) + report_fatal_error("at least one definition string must exist"); + if (Verdef->vd_cnt > 2) + report_fatal_error("more than one predecessor is not expected"); + + if (Verdef->vd_cnt == 2) { + const uint8_t *VerdauxBuf = + VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next; + const Elf_Verdaux *Verdaux = + reinterpret_cast(VerdauxBuf); + W.printString("Predecessor", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verdaux->vda_name))); + } + VerdefBuf += Verdef->vd_next; + } +} + +template +void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SD(W, "SHT_GNU_verneed"); + if (!Sec) + return; + + const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset; + const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + + const uint8_t *VerneedBuf = SecData; + unsigned VerneedNum = Sec->sh_info; + for (unsigned I = 0; I < VerneedNum; ++I) { + const Elf_Verneed *Verneed = + reinterpret_cast(VerneedBuf); + DictScope Entry(W, "Dependency"); + W.printNumber("Version", Verneed->vn_version); + W.printNumber("Count", Verneed->vn_cnt); + W.printString("FileName", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verneed->vn_file))); + + const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; + for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { + const Elf_Vernaux *Vernaux = + reinterpret_cast(VernauxBuf); + DictScope Entry(W, "Entry"); + W.printNumber("Hash", Vernaux->vna_hash); + W.printEnum("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", Vernaux->vna_other); + W.printString("Name", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Vernaux->vna_name))); + VernauxBuf += Vernaux->vna_next; + } + VerneedBuf += Verneed->vn_next; + } +} + template void LLVMStyle::printHashHistogram(const ELFFile *Obj) { W.startLine() << "Hash Histogram not implemented!\n"; diff --git a/utils/gn/README.rst b/utils/gn/README.rst index 3c40846689c2..7ffa144aafb3 100644 --- a/utils/gn/README.rst +++ b/utils/gn/README.rst @@ -22,16 +22,10 @@ build. creates ninja files, but it can create some IDE projects (MSVC, Xcode, ...) which then shell out to ninja for the actual build. -Its main features are that GN is very fast (it currently produces ninja files -for LLVM's build in 35ms on the author's laptop, compared to 66s for CMake) -- -a 2000x difference), and since it's so fast it doesn't aggressively cache, -making it possible to switch e.g. between release and debug builds in one build -directory. - The main motivation behind the GN build is that some people find it more convenient for day-to-day hacking on LLVM than CMake. Distribution, building -just parts of LLVM, and embedding the LLVM GN build from other builds are a -non-goal for the GN build. +just parts of LLVM, and embedding the LLVM GN build from other builds are +non-goals for the GN build. This is a `good overview of GN `_. @@ -42,39 +36,42 @@ Quick start GN only works in the monorepo layout. -#. Obtain a gn binary. If gn is not already on your PATH, run - `llvm/utils/gn/get.py` to download a prebuilt gn binary if you're on a 64-bit - X86 system running Linux, macOS, or Windows, or `build gn yourself +#. ``git clone https://github.com/llvm/llvm-project.git; cd llvm-project`` if + you don't have a monorepo checkout yet. + +#. ``llvm/utils/gn/get.py`` to download a prebuilt gn binary if you're on a + 64-bit X86 system running Linux, macOS, or Windows. `Build gn yourself `_ if you're on a different platform or don't want to trust prebuilt binaries. -#. In the root of the monorepo, run `llvm/utils/gn/gn.py gen out/gn`. - `out/gn` is the build directory, it can have any name, and you can have as - many as you want, each with different build settings. (The `gn.py` script - adds `--dotfile=llvm/utils/gn/.gn --root=.` and just runs regular `gn`; +#. ``llvm/utils/gn/gn.py gen out/gn`` to run GN and create build files. + ``out/gn`` is the build directory, it can have any name, and you can have as + many as you want, each with different build settings. (The ``gn.py`` script + adds ``--dotfile=llvm/utils/gn/.gn --root=.`` and just runs regular ``gn``; you can manually pass these parameters and not use the wrapper if you prefer.) -#. Run e.g. `ninja -C out/gn check-lld` to build all prerequisites for and - run the LLD tests. +#. ``ninja -C out/gn check-lld`` to build all prerequisites for and run the LLD + tests. By default, you get a release build with assertions enabled that targets -the host arch. You can set various build options by editing `out/gn/args.gn`, -for example putting `is_debug = true` in there gives you a debug build. Run -`llvm/utils/gn/gn.py args --list out/gn` to see a list of all possible -options. After touching `out/gn/args.gn`, just run ninja, it will re-invoke gn +the host arch. You can set build options by editing ``out/gn/args.gn``, for +example putting ``is_debug = true`` in there gives you a debug build. Run +``llvm/utils/gn/gn.py args --list out/gn`` to see a list of all possible +options. After touching ``out/gn/args.gn`` just run ninja: it will re-invoke gn before starting the build. -GN has extensive built-in help; try e.g. `gn help gen` to see the help -for the `gen` command. The full GN reference is also `available online -`_. +GN has extensive built-in help; try e.g. ``llvm/utils/gn/gn.py help gen`` to see +the help for the ``gen`` command. The full GN reference is also `available +online `_. -GN has an autoformatter: `git ls-files '*.gn' '*.gni' | xargs -n 1 gn format` +GN has an autoformatter: +``git ls-files '*.gn' '*.gni' | xargs llvm/utils/gn/gn.py format`` after making GN build changes is your friend. -To not put `BUILD.gn` into the main tree, they are all below -`utils/gn/secondary`. For example, the build file for `llvm/lib/Support` is in -`utils/gn/secondary/llvm/lib/Support`. +To not put ``BUILD.gn`` files into the main tree, they are all below +``utils/gn/secondary``. For example, the build file for ``llvm/lib/Support`` +is in ``utils/gn/secondary/llvm/lib/Support``. .. _Syncing GN files from CMake files: @@ -83,15 +80,15 @@ Syncing GN files from CMake files Sometimes after pulling in the latest changes, the GN build doesn't work. Most of the time this is due to someone adding a file to CMakeLists.txt file. -Run `llvm/utils/gn/build/sync_source_lists_from_cmake.py` to print a report -of which files need to be added to or removed from `BUILD.gn` files to -match the corresponding `CMakeLists.txt`. You have to manually read the output +Run ``llvm/utils/gn/build/sync_source_lists_from_cmake.py`` to print a report +of which files need to be added to or removed from ``BUILD.gn`` files to +match the corresponding ``CMakeLists.txt``. You have to manually read the output of the script and implement its suggestions. -If new `CMakeLists.txt` files have been added, you have to manually create -a new corresponding `BUILD.gn` file below `llvm/utils/gn/secondary/`. +If new ``CMakeLists.txt`` files have been added, you have to manually create +a new corresponding ``BUILD.gn`` file below ``llvm/utils/gn/secondary/``. -If the dependencies in a `CMakeLists.txt` file have been changed, you have to +If the dependencies in a ``CMakeLists.txt`` file have been changed, you have to manually analyze and fix. .. _Philosophy: @@ -133,9 +130,9 @@ configure is used for three classes of feature checks: config.h in a build step). For the last two points, it would be nice if LLVM didn't have a single -`config.h` header, but one header per toggle. That way, when e.g. -`llvm_enable_terminfo` is toggled, only the 3 files caring about that setting -would need to be rebuilt, instead of everything including `config.h`. +``config.h`` header, but one header per toggle. That way, when e.g. +``llvm_enable_terminfo`` is toggled, only the 3 files caring about that setting +would need to be rebuilt, instead of everything including ``config.h``. GN doesn't believe in users setting arbitrary cflags from an environment variable, it wants the build to be controlled by .gn files. diff --git a/utils/gn/get.py b/utils/gn/get.py index 4015d5986c9d..c39649df78a4 100755 --- a/utils/gn/get.py +++ b/utils/gn/get.py @@ -3,27 +3,20 @@ from __future__ import print_function +import io import os import urllib2 import sys -import tempfile import zipfile -def download_url(url, output_file): - """Download url into output_file.""" +def download_and_unpack(url, output_dir, gn): + """Download an archive from url and extract gn from it into output_dir.""" print('downloading %s ...' % url, end='') sys.stdout.flush() - output_file.write(urllib2.urlopen(url).read()) + data = urllib2.urlopen(url).read() print(' done') - - -def download_and_unpack(url, output_dir, gn): - """Download an archive from url and extract gn from it into output_dir.""" - with tempfile.TemporaryFile() as f: - download_url(url, f) - f.seek(0) - zipfile.ZipFile(f).extract(gn, path=output_dir) + zipfile.ZipFile(io.BytesIO(data)).extract(gn, path=output_dir) def set_executable_bit(path): diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 2b0bb2b7c05d..415e0fc7f29d 100644 --- a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -28,6 +28,7 @@ static_library("clang-tidy") { sources = [ "ClangTidy.cpp", + "ClangTidyCheck.cpp", "ClangTidyDiagnosticConsumer.cpp", "ClangTidyModule.cpp", "ClangTidyOptions.cpp", diff --git a/utils/release/merge-request.sh b/utils/release/merge-request.sh index 6a4ee16d788a..0a2bf7661fac 100755 --- a/utils/release/merge-request.sh +++ b/utils/release/merge-request.sh @@ -101,7 +101,7 @@ case $stable_version in release_metabug="39106" ;; 8.0) - release_metabug="40331" + release_metabug="41221" ;; *) echo "error: invalid stable version"