diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 55c7f31f63b8..bc216be35df6 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -593,13 +593,13 @@ For example: The above example would match the line: -.. code-block:: llvm +.. code-block:: gas add r5, r5, r6 but would not match the line: -.. code-block:: llvm +.. code-block:: gas add r5, r5, r7 diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index febc1d5fe9be..770fdf9054c1 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -4316,16 +4316,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; - // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant) + // With nnan: -X + X --> 0.0 (and commuted variant) // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN. // Negative zeros are allowed because we always end up with positive zero: // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0 // X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0 - if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || - match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))) - return ConstantFP::getNullValue(Op0->getType()); + if (FMF.noNaNs()) { + if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || + match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))) + return ConstantFP::getNullValue(Op0->getType()); + + if (match(Op0, m_FNeg(m_Specific(Op1))) || + match(Op1, m_FNeg(m_Specific(Op0)))) + return ConstantFP::getNullValue(Op0->getType()); + } // (X - Y) + Y --> X // Y + (X - Y) --> X diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8fe6f721584b..27da26446ee4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19830,6 +19830,8 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { return false; } + // Try to prove that there is aliasing, or that there is no aliasing. Either + // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes, DAG, IsAlias)) diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index aea59301079e..208950d7ab71 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -4418,9 +4418,9 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { return; } if (usesLayout(Semantics)) { - const fltSemantics IEEESemantics = F.getSemantics(); - new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), IEEESemantics), - APFloat(semIEEEdouble)); + new (&Double) + DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()), + APFloat(semIEEEdouble)); return; } llvm_unreachable("Unexpected semantics"); diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index deb2bd8fbdbc..8389058e3f73 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -23,7 +23,16 @@ def CC_SI : CallingConv<[ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, - SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 + SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, + SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, + SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, + SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, + SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, + SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, + SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, + SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, + SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, + SGPR104, SGPR105 ]>>>, // We have no way of referring to the generated register tuples @@ -59,7 +68,16 @@ def RetCC_SI_Shader : CallingConv<[ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, - SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 + SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, + SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, + SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, + SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, + SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, + SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, + SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, + SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, + SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, + SGPR104, SGPR105 ]>>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 409fbfa22f38..1f813ef412e5 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -63,9 +63,9 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, case MVT::v2f32: case MVT::v4i16: case MVT::v4f16: { - // Up to SGPR0-SGPR39 + // Up to SGPR0-SGPR105 return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::SGPR_64RegClass, 20); + &AMDGPU::SGPR_64RegClass, 53); } default: return false; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 252b98d1995f..48eba2246c57 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -978,21 +978,27 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, +def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, ProcM3, FeaturePrefLoopAlign32, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, +def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, ProcM3, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, +def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, @@ -1002,22 +1008,26 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; -def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, +def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, FeatureVFPOnlySP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m35p", CortexM3Model, [ARMv8mMainline, +def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, FeatureVFPOnlySP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94f94d3fa320..643d2806c521 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1184,7 +1184,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setStackPointerRegisterToSaveRestore(ARM::SP); if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || - !Subtarget->hasVFP2()) + !Subtarget->hasVFP2() || Subtarget->hasMinSize()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 8337aaadb9b1..cfeb13c6acb6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -663,7 +663,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { + T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> { // A6.2 & A8.6.59 bits<3> Rt; bits<8> addr; @@ -677,7 +677,7 @@ let canFoldAsLoad = 1 in def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { + T1LdStSP<{1,?,?}>, Sched<[WriteLd]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -728,39 +728,39 @@ multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", - load>; + load>, Sched<[WriteLd]>; // A8.6.64 & A8.6.61 defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", - zextloadi8>; + zextloadi8>, Sched<[WriteLd]>; // A8.6.76 & A8.6.73 defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", - zextloadi16>; + zextloadi16>, Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSB : // A8.6.80 T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr), AddrModeT1_1, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>; + [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSH : // A8.6.84 T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr), AddrModeT1_2, IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>; + [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>; def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$Rt, $addr", [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}> { + T1LdStSP<{0,?,?}>, Sched<[WriteST]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -771,19 +771,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", - store>; + store>, Sched<[WriteST]>; // A8.6.197 & A8.6.195 defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", - truncstorei8>; + truncstorei8>, Sched<[WriteST]>; // A8.6.207 & A8.6.205 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - truncstorei16>; + truncstorei16>, Sched<[WriteST]>; //===----------------------------------------------------------------------===// @@ -843,7 +843,7 @@ let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1, def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, "pop${p}\t$regs", []>, - T1Misc<{1,1,0,?,?,?,?}> { + T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> { bits<16> regs; let Inst{8} = regs{15}; let Inst{7-0} = regs{7-0}; @@ -853,7 +853,7 @@ let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iStore_m, "push${p}\t$regs", []>, - T1Misc<{0,1,0,?,?,?,?}> { + T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> { bits<16> regs; let Inst{8} = regs{14}; let Inst{7-0} = regs{7-0}; @@ -1214,7 +1214,7 @@ def tMUL : // A8.6.105 T1 Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2, IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd", [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>, - T1DataProcessing<0b1101> { + T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> { bits<3> Rd; bits<3> Rn; let Inst{5-3} = Rn; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2909d03cca0e..234b2767494d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1333,7 +1333,8 @@ def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), @@ -2331,14 +2332,14 @@ class T2SatI def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "ssat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let Inst{5} = 0; } def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), "ssat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let sh = 0b100000; let Inst{4} = 0; @@ -2346,13 +2347,13 @@ def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "usat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; } def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), "usat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; let sh = 0b100000; let Inst{4} = 0; @@ -2476,7 +2477,7 @@ class T2TwoRegBitFI { + [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -2492,7 +2493,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), def t2SBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10100; @@ -2501,7 +2502,7 @@ def t2SBFX: T2TwoRegBitFI< def t2UBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b11100; @@ -2527,7 +2528,7 @@ let Constraints = "$src = $Rd" in { (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm), IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm", [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, - bf_inv_mask_imm:$imm))]> { + bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -3281,17 +3282,17 @@ def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3307,7 +3308,7 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3315,17 +3316,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3341,7 +3342,7 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2), AddrModeNone, 4, NoItinerary, "ldaexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, - HasAcquireRelease, HasV7Clrex, IsNotMClass]> { + HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -3356,14 +3357,14 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), @@ -3371,7 +3372,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3388,7 +3389,7 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3399,7 +3400,7 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), [(set rGPR:$Rd, (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3408,7 +3409,7 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), [(set rGPR:$Rd, (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3416,7 +3417,8 @@ def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, "stlex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, + Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3433,7 +3435,7 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd), AddrModeNone, 4, NoItinerary, "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex, IsNotMClass]> { + HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 4e848406694b..ce74d325c4e5 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -424,4 +424,4 @@ include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" include "ARMScheduleA57.td" -include "ARMScheduleM3.td" +include "ARMScheduleM4.td" diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td deleted file mode 100644 index 325e28977ee1..000000000000 --- a/lib/Target/ARM/ARMScheduleM3.td +++ /dev/null @@ -1,20 +0,0 @@ -//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the ARM Cortex-M3 processor. -// -//===----------------------------------------------------------------------===// - -def CortexM3Model : SchedMachineModel { - let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue - let MicroOpBufferSize = 0; // In-order - let LoadLatency = 2; // Latency when not pipelined, not pc-relative - let MispredictPenalty = 2; // Best case branch taken cost - - let CompleteModel = 0; -} diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td new file mode 100644 index 000000000000..38c8ea2b4f35 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleM4.td @@ -0,0 +1,119 @@ +//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM4Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + let PostRAScheduler = 1; + + let CompleteModel = 0; +} + + +// We model the entire cpu as a single pipeline with a BufferSize = 0 since +// Cortex-M4 is in-order. + +def M4Unit : ProcResource<1> { let BufferSize = 0; } + + +let SchedModel = CortexM4Model in { + +// Some definitions of latencies we apply to different instructions + +class M4UnitL1 : WriteRes { let Latency = 1; } +class M4UnitL2 : WriteRes { let Latency = 2; } +class M4UnitL3 : WriteRes { let Latency = 3; } +class M4UnitL14 : WriteRes { let Latency = 14; } +def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } +def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } +class M4UnitL1I : InstRW<[M4UnitL1_wr], instr>; +class M4UnitL2I : InstRW<[M4UnitL2_wr], instr>; + + +// Loads, MAC's and DIV all get a higher latency of 2 +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; + +def : M4UnitL2I<(instregex "(t|t2)LDM")>; + + +// Stores we use a latency of 1 as they have no outputs + +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)STM")>; + + +// Everything else has a Latency of 1 + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)MOV")>; +def : M4UnitL1I<(instrs COPY)>; +def : M4UnitL1I<(instregex "t2IT")>; +def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", + "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. +// Loads still take 2 cycles. + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL2I<(instregex "VLD")>; +def : M4UnitL1I<(instregex "VST")>; +def : M4UnitL3; +def : M4UnitL3; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; + +def : ReadAdvance; +def : ReadAdvance; + +} diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 22652d6256b9..63f694199f44 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -361,6 +361,13 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { } bool ARMSubtarget::enableMachineScheduler() const { + // The MachineScheduler can increase register usage, so we use more high + // registers and end up with more T2 instructions that cannot be converted to + // T1 instructions. At least until we do better at converting to thumb1 + // instructions, on cortex-m at Oz where we are size-paranoid, don't use the + // Machine scheduler, relying on the DAG register pressure scheduler instead. + if (isMClass() && hasMinSize()) + return false; // Enable the MachineScheduler before register allocation for subtargets // with the use-misched feature. return useMachineScheduler(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index ac49a0eeb730..ddeec03ba784 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -85,7 +85,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI = new MipsMCAsmInfo(TT); unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfaRegister(nullptr, SP); MAI->addInitialFrameState(Inst); return MAI; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 123c8b9630cb..7cb955d03ff4 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1174,7 +1174,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { } if (SwitchInst *SI = dyn_cast(BB->getTerminator())) - TryToUnfoldSelect(SI, BB); + if (TryToUnfoldSelect(SI, BB)) + return true; // Check for some cases that are worth simplifying. Right now we want to look // for loads that are used by a switch or by the condition for the branch. If diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 83f1c70f4cf2..ce03b854ffd5 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1169,6 +1169,18 @@ class LoopVectorizationCostModel { return foldTailByMasking() || Legal->blockNeedsPredication(BB); } + /// Estimate cost of an intrinsic call instruction CI if it were vectorized + /// with factor VF. Return the cost of the instruction, including + /// scalarization overhead if it's needed. + unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF); + + /// Estimate cost of a call instruction CI if it were vectorized with factor + /// VF. Return the cost of the instruction, including scalarization overhead + /// if it's needed. The flag NeedToScalarize shows if the call needs to be + /// scalarized - + // i.e. either vector version isn't available, or is too expensive. + unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize); + private: unsigned NumPredStores = 0; @@ -1221,6 +1233,10 @@ class LoopVectorizationCostModel { /// element) unsigned getUniformMemOpCost(Instruction *I, unsigned VF); + /// Estimate the overhead of scalarizing an instruction. This is a + /// convenience wrapper for the type-based getScalarizationOverhead API. + unsigned getScalarizationOverhead(Instruction *I, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -3057,45 +3073,9 @@ static void cse(BasicBlock *BB) { } } -/// Estimate the overhead of scalarizing an instruction. This is a -/// convenience wrapper for the type-based getScalarizationOverhead API. -static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, - const TargetTransformInfo &TTI) { - if (VF == 1) - return 0; - - unsigned Cost = 0; - Type *RetTy = ToVectorTy(I->getType(), VF); - if (!RetTy->isVoidTy() && - (!isa(I) || - !TTI.supportsEfficientVectorElementLoadStore())) - Cost += TTI.getScalarizationOverhead(RetTy, true, false); - - // Some targets keep addresses scalar. - if (isa(I) && !TTI.prefersVectorizedAddressing()) - return Cost; - - if (CallInst *CI = dyn_cast(I)) { - SmallVector Operands(CI->arg_operands()); - Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - } - else if (!isa(I) || - !TTI.supportsEfficientVectorElementLoadStore()) { - SmallVector Operands(I->operand_values()); - Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - } - - return Cost; -} - -// Estimate cost of a call instruction CI if it were vectorized with factor VF. -// Return the cost of the instruction, including scalarization overhead if it's -// needed. The flag NeedToScalarize shows if the call needs to be scalarized - -// i.e. either vector version isn't available, or is too expensive. -static unsigned getVectorCallCost(CallInst *CI, unsigned VF, - const TargetTransformInfo &TTI, - const TargetLibraryInfo *TLI, - bool &NeedToScalarize) { +unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, + unsigned VF, + bool &NeedToScalarize) { Function *F = CI->getCalledFunction(); StringRef FnName = CI->getCalledFunction()->getName(); Type *ScalarRetTy = CI->getType(); @@ -3118,7 +3098,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF, // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. - unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI); + unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); unsigned Cost = ScalarCallCost * VF + ScalarizationCost; @@ -3137,12 +3117,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF, return Cost; } -// Estimate cost of an intrinsic call instruction CI if it were vectorized with -// factor VF. Return the cost of the instruction, including scalarization -// overhead if it's needed. -static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, - const TargetTransformInfo &TTI, - const TargetLibraryInfo *TLI) { +unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, + unsigned VF) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); assert(ID && "Expected intrinsic call!"); @@ -4126,9 +4102,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize; - unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); + unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize); bool UseVectorIntrinsic = - ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; + ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost; assert((UseVectorIntrinsic || !NeedToScalarize) && "Instruction should be scalarized elsewhere."); @@ -5522,7 +5498,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. - Cost += getScalarizationOverhead(I, VF, TTI); + Cost += getScalarizationOverhead(I, VF); // If we have a predicated store, it may not be executed for each vector // lane. Scale the cost by the probability of executing the predicated @@ -5674,6 +5650,34 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return VectorizationCostTy(C, TypeNotScalarized); } +unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, + unsigned VF) { + + if (VF == 1) + return 0; + + unsigned Cost = 0; + Type *RetTy = ToVectorTy(I->getType(), VF); + if (!RetTy->isVoidTy() && + (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) + Cost += TTI.getScalarizationOverhead(RetTy, true, false); + + // Some targets keep addresses scalar. + if (isa(I) && !TTI.prefersVectorizedAddressing()) + return Cost; + + if (CallInst *CI = dyn_cast(I)) { + SmallVector Operands(CI->arg_operands()); + Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); + } else if (!isa(I) || + !TTI.supportsEfficientVectorElementLoadStore()) { + SmallVector Operands(I->operand_values()); + Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); + } + + return Cost; +} + void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { if (VF == 1) return; @@ -5914,7 +5918,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // The cost of insertelement and extractelement instructions needed for // scalarization. - Cost += getScalarizationOverhead(I, VF, TTI); + Cost += getScalarizationOverhead(I, VF); // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally @@ -6035,16 +6039,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, case Instruction::Call: { bool NeedToScalarize; CallInst *CI = cast(I); - unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize); + unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize); if (getVectorIntrinsicIDForCall(CI, TLI)) - return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI)); + return std::min(CallCost, getVectorIntrinsicCost(CI, VF)); return CallCost; } default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + - getScalarizationOverhead(I, VF, TTI); + getScalarizationOverhead(I, VF); } // end of switch. } @@ -6638,9 +6642,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize; - unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); + unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize); bool UseVectorIntrinsic = - ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; + ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost; return UseVectorIntrinsic || !NeedToScalarize; } if (isa(I) || isa(I)) { @@ -6828,7 +6832,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = llvm::make_unique(VPBB); - VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder); + VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); diff --git a/lib/Transforms/Vectorize/VPRecipeBuilder.h b/lib/Transforms/Vectorize/VPRecipeBuilder.h index bc6b22120995..0ca6a6b93cfd 100644 --- a/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -29,9 +29,6 @@ class VPRecipeBuilder { /// Target Library Info. const TargetLibraryInfo *TLI; - /// Target Transform Info. - const TargetTransformInfo *TTI; - /// The legality analysis. LoopVectorizationLegality *Legal; @@ -104,11 +101,9 @@ class VPRecipeBuilder { public: VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VPBuilder &Builder) - : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), - Builder(Builder) {} + : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {} /// Check if a recipe can be create for \p I withing the given VF \p Range. /// If a recipe can be created, it adds it to \p VPBB. diff --git a/test/CodeGen/AMDGPU/sgpr-limit.ll b/test/CodeGen/AMDGPU/sgpr-limit.ll new file mode 100644 index 000000000000..364cfd880db7 --- /dev/null +++ b/test/CodeGen/AMDGPU/sgpr-limit.ll @@ -0,0 +1,265 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s + +; CHECK: s_add_i32 s0, s0, s1 +; CHECK: s_add_i32 s1, s0, s2 +; CHECK: s_add_i32 s2, s1, s3 +; CHECK: s_add_i32 s3, s2, s4 +; CHECK: s_add_i32 s4, s3, s5 +; CHECK: s_add_i32 s5, s4, s6 +; CHECK: s_add_i32 s6, s5, s7 +; CHECK: s_add_i32 s7, s6, s8 +; CHECK: s_add_i32 s8, s7, s9 +; CHECK: s_add_i32 s9, s8, s10 +; CHECK: s_add_i32 s10, s9, s11 +; CHECK: s_add_i32 s11, s10, s12 +; CHECK: s_add_i32 s12, s11, s13 +; CHECK: s_add_i32 s13, s12, s14 +; CHECK: s_add_i32 s14, s13, s15 +; CHECK: s_add_i32 s15, s14, s16 +; CHECK: s_add_i32 s16, s15, s17 +; CHECK: s_add_i32 s17, s16, s18 +; CHECK: s_add_i32 s18, s17, s19 +; CHECK: s_add_i32 s19, s18, s20 +; CHECK: s_add_i32 s20, s19, s21 +; CHECK: s_add_i32 s21, s20, s22 +; CHECK: s_add_i32 s22, s21, s23 +; CHECK: s_add_i32 s23, s22, s24 +; CHECK: s_add_i32 s24, s23, s25 +; CHECK: s_add_i32 s25, s24, s26 +; CHECK: s_add_i32 s26, s25, s27 +; CHECK: s_add_i32 s27, s26, s28 +; CHECK: s_add_i32 s28, s27, s29 +; CHECK: s_add_i32 s29, s28, s30 +; CHECK: s_add_i32 s30, s29, s31 +; CHECK: s_add_i32 s31, s30, s32 +; CHECK: s_add_i32 s32, s31, s33 +; CHECK: s_add_i32 s33, s32, s34 +; CHECK: s_add_i32 s34, s33, s35 +; CHECK: s_add_i32 s35, s34, s36 +; CHECK: s_add_i32 s36, s35, s37 +; CHECK: s_add_i32 s37, s36, s38 +; CHECK: s_add_i32 s38, s37, s39 +; CHECK: s_add_i32 s39, s38, s40 +; CHECK: s_add_i32 s40, s39, s41 +; CHECK: s_add_i32 s41, s40, s42 +; CHECK: s_add_i32 s42, s41, s43 +; CHECK: s_add_i32 s43, s42, s44 +; CHECK: s_add_i32 s44, s43, s45 +; CHECK: s_add_i32 s45, s44, s46 +; CHECK: s_add_i32 s46, s45, s47 +; CHECK: s_add_i32 s47, s46, s48 +; CHECK: s_add_i32 s48, s47, s49 +; CHECK: s_add_i32 s49, s48, s50 +; CHECK: s_add_i32 s50, s49, s51 +; CHECK: s_add_i32 s51, s50, s52 +; CHECK: s_add_i32 s52, s51, s53 +; CHECK: s_add_i32 s53, s52, s54 +; CHECK: s_add_i32 s54, s53, s55 +; CHECK: s_add_i32 s55, s54, s56 +; CHECK: s_add_i32 s56, s55, s57 +; CHECK: s_add_i32 s57, s56, s58 +; CHECK: s_add_i32 s58, s57, s59 +; CHECK: s_add_i32 s59, s58, s60 +; CHECK: s_add_i32 s60, s59, s61 +; CHECK: s_add_i32 s61, s60, s62 +; CHECK: s_add_i32 s62, s61, s63 +define amdgpu_gs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } @_amdgpu_gs_sgpr_limit_i32 (i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, <4 x i32> inreg) { +.entry: + %65 = add i32 %0, %1 + %66 = add i32 %65, %2 + %67 = add i32 %66, %3 + %68 = add i32 %67, %4 + %69 = add i32 %68, %5 + %70 = add i32 %69, %6 + %71 = add i32 %70, %7 + %72 = add i32 %71, %8 + %73 = add i32 %72, %9 + %74 = add i32 %73, %10 + %75 = add i32 %74, %11 + %76 = add i32 %75, %12 + %77 = add i32 %76, %13 + %78 = add i32 %77, %14 + %79 = add i32 %78, %15 + %80 = add i32 %79, %16 + %81 = add i32 %80, %17 + %82 = add i32 %81, %18 + %83 = add i32 %82, %19 + %84 = add i32 %83, %20 + %85 = add i32 %84, %21 + %86 = add i32 %85, %22 + %87 = add i32 %86, %23 + %88 = add i32 %87, %24 + %89 = add i32 %88, %25 + %90 = add i32 %89, %26 + %91 = add i32 %90, %27 + %92 = add i32 %91, %28 + %93 = add i32 %92, %29 + %94 = add i32 %93, %30 + %95 = add i32 %94, %31 + %96 = add i32 %95, %32 + %97 = add i32 %96, %33 + %98 = add i32 %97, %34 + %99 = add i32 %98, %35 + %100 = add i32 %99, %36 + %101 = add i32 %100, %37 + %102 = add i32 %101, %38 + %103 = add i32 %102, %39 + %104 = add i32 %103, %40 + %105 = add i32 %104, %41 + %106 = add i32 %105, %42 + %107 = add i32 %106, %43 + %108 = add i32 %107, %44 + %109 = add i32 %108, %45 + %110 = add i32 %109, %46 + %111 = add i32 %110, %47 + %112 = add i32 %111, %48 + %113 = add i32 %112, %49 + %114 = add i32 %113, %50 + %115 = add i32 %114, %51 + %116 = add i32 %115, %52 + %117 = add i32 %116, %53 + %118 = add i32 %117, %54 + %119 = add i32 %118, %55 + %120 = add i32 %119, %56 + %121 = add i32 %120, %57 + %122 = add i32 %121, %58 + %123 = add i32 %122, %59 + %124 = add i32 %123, %60 + %125 = add i32 %124, %61 + %126 = add i32 %125, %62 + %127 = add i32 %126, %63 +%128 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } undef, i32 %65, 0 +%129 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %128, i32 %66, 1 +%130 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %129, i32 %67, 2 +%131 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %130, i32 %68, 3 +%132 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %131, i32 %69, 4 +%133 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %132, i32 %70, 5 +%134 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %133, i32 %71, 6 +%135 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %134, i32 %72, 7 +%136 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %135, i32 %73, 8 +%137 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %136, i32 %74, 9 +%138 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %137, i32 %75, 10 +%139 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %138, i32 %76, 11 +%140 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %139, i32 %77, 12 +%141 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %140, i32 %78, 13 +%142 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %141, i32 %79, 14 +%143 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %142, i32 %80, 15 +%144 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %143, i32 %81, 16 +%145 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %144, i32 %82, 17 +%146 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %145, i32 %83, 18 +%147 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %146, i32 %84, 19 +%148 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %147, i32 %85, 20 +%149 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %148, i32 %86, 21 +%150 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %149, i32 %87, 22 +%151 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %150, i32 %88, 23 +%152 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %151, i32 %89, 24 +%153 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %152, i32 %90, 25 +%154 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %153, i32 %91, 26 +%155 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %154, i32 %92, 27 +%156 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %155, i32 %93, 28 +%157 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %156, i32 %94, 29 +%158 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %157, i32 %95, 30 +%159 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %158, i32 %96, 31 +%160 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %159, i32 %97, 32 +%161 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %160, i32 %98, 33 +%162 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %161, i32 %99, 34 +%163 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %162, i32 %100, 35 +%164 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %163, i32 %101, 36 +%165 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %164, i32 %102, 37 +%166 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %165, i32 %103, 38 +%167 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %166, i32 %104, 39 +%168 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %167, i32 %105, 40 +%169 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %168, i32 %106, 41 +%170 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %169, i32 %107, 42 +%171 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %170, i32 %108, 43 +%172 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %171, i32 %109, 44 +%173 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %172, i32 %110, 45 +%174 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %173, i32 %111, 46 +%175 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %174, i32 %112, 47 +%176 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %175, i32 %113, 48 +%177 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %176, i32 %114, 49 +%178 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %177, i32 %115, 50 +%179 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %178, i32 %116, 51 +%180 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %179, i32 %117, 52 +%181 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %180, i32 %118, 53 +%182 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %181, i32 %119, 54 +%183 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %182, i32 %120, 55 +%184 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %183, i32 %121, 56 +%185 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %184, i32 %122, 57 +%186 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %185, i32 %123, 58 +%187 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %186, i32 %124, 59 +%188 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %187, i32 %125, 60 +%189 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %188, i32 %126, 61 +%190 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %189, i32 %127, 62 + ret { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %190 +} + +; CHECK: s_xor_b64 s[0:1], s[0:1], s[2:3] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[4:5] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[6:7] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[8:9] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[10:11] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[12:13] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[14:15] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[16:17] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[18:19] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[20:21] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[22:23] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[24:25] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[26:27] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[28:29] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[30:31] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[32:33] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[34:35] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[36:37] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[38:39] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[40:41] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[42:43] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[44:45] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[46:47] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[48:49] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[50:51] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[52:53] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[54:55] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[56:57] +; CHECK: s_xor_b64 s[0:1], s[0:1], s[58:59] +define amdgpu_gs void @_amdgpu_gs_sgpr_limit_i64 (i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, <4 x i32> inreg %addr) { +.entry: + %31 = xor i64 %0, %1 + %32 = xor i64 %31, %2 + %33 = xor i64 %32, %3 + %34 = xor i64 %33, %4 + %35 = xor i64 %34, %5 + %36 = xor i64 %35, %6 + %37 = xor i64 %36, %7 + %38 = xor i64 %37, %8 + %39 = xor i64 %38, %9 + %40 = xor i64 %39, %10 + %41 = xor i64 %40, %11 + %42 = xor i64 %41, %12 + %43 = xor i64 %42, %13 + %44 = xor i64 %43, %14 + %45 = xor i64 %44, %15 + %46 = xor i64 %45, %16 + %47 = xor i64 %46, %17 + %48 = xor i64 %47, %18 + %49 = xor i64 %48, %19 + %50 = xor i64 %49, %20 + %51 = xor i64 %50, %21 + %52 = xor i64 %51, %22 + %53 = xor i64 %52, %23 + %54 = xor i64 %53, %24 + %55 = xor i64 %54, %25 + %56 = xor i64 %55, %26 + %57 = xor i64 %56, %27 + %58 = xor i64 %57, %28 + %59 = xor i64 %58, %29 + %60 = bitcast i64 %59 to <2 x i32> + call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %60, <4 x i32> %addr, i32 4, i32 0, i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) + diff --git a/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll index 524424a25e02..40fd39e6eac5 100644 --- a/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: add_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: sxtah [[COUNT:r[0-9]+]], [[COUNT]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: %cmp24 = icmp sgt i32 %arg, 0 @@ -53,10 +53,10 @@ for.body: ; CHECK-LABEL: mul_bottom_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: sxth [[SXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: mul [[COUNT:r[0-9]+]],{{.*}}[[SXT]] define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -104,8 +104,8 @@ for.body: ; CHECK: %for.body ; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! ; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] -; CHECK: asr.w [[ASR:[rl0-9]+]], [[B]], #16 +; CHECK: asrs [[ASR:[rl0-9]+]], [[A]], #16 +; CHECK: smlad [[ACC:[rl0-9]+]], [[A]], [[B]], [[ACC]] ; CHECK: mul [[COUNT:[rl0-9]+]],{{.}}[[ASR]] define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -151,10 +151,10 @@ for.body: ; CHECK-LABEL: and_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: uxth [[UXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[UXT]] define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -201,12 +201,12 @@ for.body: ; CHECK-LABEL: multi_uses ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]], [{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]], [{{.*}}, #2]! -; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]], [{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]], [{{.*}}, #2]! ; CHECK: sxth [[SXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: eor.w [[EOR:r[0-9]+]], [[SXT]], [[SHIFT:r[0-9]+]] -; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[SXT]] +; CHECK: muls [[MUL:r[0-9]+]],{{.*}}[[SXT]] ; CHECK: lsl.w [[SHIFT]], [[MUL]], #16 define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll index 5545dfdcd4c8..8d31485175af 100644 --- a/test/CodeGen/ARM/aapcs-hfa-code.ll +++ b/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -76,8 +76,8 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3 ; CHECK-M4F-LABEL: test_1double_nosplit: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp] ; CHECK-M4F: bl test_1double_nosplit call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) @@ -97,8 +97,8 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double], ; CHECK-M4F-LABEL: test_1double_misaligned: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8] ; CHECK-M4F: bl test_1double_misaligned diff --git a/test/CodeGen/ARM/useaa.ll b/test/CodeGen/ARM/useaa.ll index d7913e7bad90..076466d4d2c1 100644 --- a/test/CodeGen/ARM/useaa.ll +++ b/test/CodeGen/ARM/useaa.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv7m-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv8m-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; Check we use AA during codegen, so can interleave these loads/stores. diff --git a/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll b/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll index 0c5face6c039..b6b4805b97d9 100644 --- a/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll +++ b/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll @@ -100,10 +100,10 @@ if.end: ; CHECK-BP: str ; CHECK-BP: b ; CHECK-BP: str -; CHECK-BP: ldr +; CHECK-BP: add ; CHECK-NOBP: ittee ; CHECK-NOBP: streq -; CHECK-NOBP: ldreq +; CHECK-NOBP: addeq ; CHECK-NOBP: strne ; CHECK-NOBP: strne define i32 @diamond2(i32 %n, i32* %p, i32* %q) { @@ -119,7 +119,7 @@ if.then: if.else: store i32 %n, i32* %q, align 4 - %0 = load i32, i32* %p, align 4 + %0 = add i32 %n, 10 br label %if.end if.end: diff --git a/test/CodeGen/Thumb2/m4-sched-ldr.mir b/test/CodeGen/Thumb2/m4-sched-ldr.mir new file mode 100644 index 000000000000..41abefd85a62 --- /dev/null +++ b/test/CodeGen/Thumb2/m4-sched-ldr.mir @@ -0,0 +1,60 @@ +# RUN: llc %s -run-pass machine-scheduler -o - | FileCheck %s + +# CHECK-LABEL: bb.0. +# CHECK: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2ADDri +# CHECK-NEXT: t2ADDri +--- | + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7em-arm-none-eabi" + + ; Function Attrs: norecurse nounwind optsize readonly + define dso_local i32 @test(i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + entry: + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 10 + %1 = load i32, i32* %b, align 4 + %add1 = add nsw i32 %1, 20 + %mul = mul nsw i32 %add1, %add + ret i32 %mul + } + + attributes #0 = { "target-cpu"="cortex-m4" } + +... +--- +name: test +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gprnopc, preferred-register: '' } + - { id: 3, class: rgpr, preferred-register: '' } + - { id: 4, class: gprnopc, preferred-register: '' } + - { id: 5, class: rgpr, preferred-register: '' } + - { id: 6, class: rgpr, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } + - { reg: '$r1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $r0, $r1 + + %1:gpr = COPY $r1 + %0:gpr = COPY $r0 + %2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load 4 from %ir.a) + %3:rgpr = nsw t2ADDri %2, 10, 14, $noreg, $noreg + %4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.b) + %5:rgpr = nsw t2ADDri %4, 20, 14, $noreg, $noreg + %6:rgpr = nsw t2MUL %5, %3, 14, $noreg + $r0 = COPY %6 + tBX_RET 14, $noreg, implicit $r0 + +... diff --git a/test/CodeGen/Thumb2/m4-sched-regs.ll b/test/CodeGen/Thumb2/m4-sched-regs.ll new file mode 100644 index 000000000000..29952feff070 --- /dev/null +++ b/test/CodeGen/Thumb2/m4-sched-regs.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7em-arm-none-eabi" + +%struct.a = type { i32, %struct.b*, i8, i8, i8, i8, i8*, %struct.b*, i16, i16, i16, i16, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32 } +%struct.b = type { i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, [16 x i8], [64 x i8], [128 x i8], i32, [68 x i8] } + +define void @test(%struct.a* nocapture %dhcp, i16 zeroext %value) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldrh r2, [r0, #20] +; CHECK-NEXT: adds r3, r2, #1 +; CHECK-NEXT: strh r3, [r0, #20] +; CHECK-NEXT: ldr r3, [r0, #16] +; CHECK-NEXT: add r2, r3 +; CHECK-NEXT: lsrs r3, r1, #8 +; CHECK-NEXT: strb.w r3, [r2, #240] +; CHECK-NEXT: ldrh r2, [r0, #20] +; CHECK-NEXT: adds r3, r2, #1 +; CHECK-NEXT: strh r3, [r0, #20] +; CHECK-NEXT: ldr r0, [r0, #16] +; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: strb.w r1, [r0, #240] +; CHECK-NEXT: bx lr +entry: + %shr = lshr i16 %value, 8 + %conv1 = trunc i16 %shr to i8 + %msg_out = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 7 + %0 = load %struct.b*, %struct.b** %msg_out, align 4 + %options_out_len = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 8 + %1 = load i16, i16* %options_out_len, align 4 + %inc = add i16 %1, 1 + store i16 %inc, i16* %options_out_len, align 4 + %idxprom = zext i16 %1 to i32 + %arrayidx = getelementptr inbounds %struct.b, %struct.b* %0, i32 0, i32 15, i32 %idxprom + store i8 %conv1, i8* %arrayidx, align 1 + %conv4 = trunc i16 %value to i8 + %2 = load %struct.b*, %struct.b** %msg_out, align 4 + %3 = load i16, i16* %options_out_len, align 4 + %inc8 = add i16 %3, 1 + store i16 %inc8, i16* %options_out_len, align 4 + %idxprom9 = zext i16 %3 to i32 + %arrayidx10 = getelementptr inbounds %struct.b, %struct.b* %2, i32 0, i32 15, i32 %idxprom9 + store i8 %conv4, i8* %arrayidx10, align 1 + ret void +} + +attributes #0 = { minsize optsize "target-cpu"="cortex-m4" } diff --git a/test/MC/Mips/cfi-advance-loc.s b/test/MC/Mips/cfi-advance-loc.s index 4f5666ab33cd..407ad3faffe3 100644 --- a/test/MC/Mips/cfi-advance-loc.s +++ b/test/MC/Mips/cfi-advance-loc.s @@ -37,7 +37,7 @@ g: // CHECK-LE-NEXT: EntrySize: 0 // CHECK-LE-NEXT: SectionData ( // CHECK-LE-NEXT: 0000: 10000000 00000000 017A5200 017C1F01 -// CHECK-LE-NEXT: 0010: 0B0C1D00 14000000 18000000 00000000 +// CHECK-LE-NEXT: 0010: 0B0D1D00 14000000 18000000 00000000 // CHECK-LE-NEXT: 0020: 04010000 00030001 0E080000 14000000 // CHECK-LE-NEXT: 0030: 30000000 04010000 04000100 00040000 // CHECK-LE-NEXT: 0040: 01000E08 @@ -60,7 +60,7 @@ g: // CHECK-BE-NEXT: EntrySize: 0 // CHECK-BE-NEXT: SectionData ( // CHECK-BE-NEXT: 0000: 00000010 00000000 017A5200 017C1F01 -// CHECK-BE-NEXT: 0010: 0B0C1D00 00000014 00000018 00000000 +// CHECK-BE-NEXT: 0010: 0B0D1D00 00000014 00000018 00000000 // CHECK-BE-NEXT: 0020: 00000104 00030100 0E080000 00000014 // CHECK-BE-NEXT: 0030: 00000030 00000104 00010004 00040001 // CHECK-BE-NEXT: 0040: 00000E08 diff --git a/test/MC/Mips/cfi-encoding.s b/test/MC/Mips/cfi-encoding.s index fe0980768307..98a5ad1afe83 100644 --- a/test/MC/Mips/cfi-encoding.s +++ b/test/MC/Mips/cfi-encoding.s @@ -6,15 +6,15 @@ # RUN: | llvm-objdump -s -section=.eh_frame - | FileCheck --check-prefix=N64 %s # O32: 0000 00000010 00000000 017a5200 017c1f01 -# O32: 0010 0b0c1d00 00000010 00000018 00000000 +# O32: 0010 0b0d1d00 00000010 00000018 00000000 # O32: 0020 00000004 00000000 # N32: 0000 00000010 00000000 017a5200 017c1f01 -# N32: 0010 0b0c1d00 00000010 00000018 00000000 +# N32: 0010 0b0d1d00 00000010 00000018 00000000 # N32: 0020 00000004 00000000 # N64: 0000 00000010 00000000 017a5200 01781f01 -# N64: 0010 0c0c1d00 00000018 00000018 00000000 +# N64: 0010 0c0d1d00 00000018 00000018 00000000 # N64: 0020 00000000 00000000 00000004 00000000 foo: diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s index e03027a20a4f..e901f44196d8 100644 --- a/test/MC/Mips/eh-frame.s +++ b/test/MC/Mips/eh-frame.s @@ -31,8 +31,7 @@ func: // DWARF32: Return address column: 31 // DWARF32: Augmentation data: 0B // ^^ fde pointer encoding: DW_EH_PE_sdata4 -// DWARF32: DW_CFA_def_cfa: reg29 +0 -// FIXME: The instructions are different from the ones produces by gas. +// DWARF32: DW_CFA_def_cfa_register: reg29 // // DWARF32: 00000014 00000010 00000018 FDE cie=00000018 pc=00000000...00000000 // DWARF32: DW_CFA_nop: @@ -49,8 +48,7 @@ func: // DWARF64: Return address column: 31 // DWARF64: Augmentation data: 0C // ^^ fde pointer encoding: DW_EH_PE_sdata8 -// DWARF64: DW_CFA_def_cfa: reg29 +0 -// FIXME: The instructions are different from the ones produces by gas. +// DWARF64: DW_CFA_def_cfa_register: reg29 // // DWARF64: 00000014 00000018 00000018 FDE cie=00000018 pc=00000000...00000000 // DWARF64: DW_CFA_nop: diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll index 01d53ab98405..bff2fc3770fe 100644 --- a/test/Transforms/InstCombine/trunc.ll +++ b/test/Transforms/InstCombine/trunc.ll @@ -8,8 +8,8 @@ declare void @use(i32) define i64 @test1(i64 %a) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[B:%.*]] = trunc i64 %a to i32 -; CHECK-NEXT: [[C:%.*]] = and i64 %a, 15 +; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: [[C:%.*]] = and i64 [[A]], 15 ; CHECK-NEXT: call void @use(i32 [[B]]) ; CHECK-NEXT: ret i64 [[C]] ; @@ -22,8 +22,8 @@ define i64 @test1(i64 %a) { define i64 @test2(i64 %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = trunc i64 %a to i32 -; CHECK-NEXT: [[D1:%.*]] = shl i64 %a, 36 +; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: [[D1:%.*]] = shl i64 [[A]], 36 ; CHECK-NEXT: [[D:%.*]] = ashr exact i64 [[D1]], 36 ; CHECK-NEXT: call void @use(i32 [[B]]) ; CHECK-NEXT: ret i64 [[D]] @@ -38,8 +38,8 @@ define i64 @test2(i64 %a) { define i64 @test3(i64 %a) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[B:%.*]] = trunc i64 %a to i32 -; CHECK-NEXT: [[C:%.*]] = and i64 %a, 8 +; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: [[C:%.*]] = and i64 [[A]], 8 ; CHECK-NEXT: call void @use(i32 [[B]]) ; CHECK-NEXT: ret i64 [[C]] ; @@ -52,8 +52,8 @@ define i64 @test3(i64 %a) { define i64 @test4(i64 %a) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[B:%.*]] = trunc i64 %a to i32 -; CHECK-NEXT: [[C:%.*]] = and i64 %a, 8 +; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: [[C:%.*]] = and i64 [[A]], 8 ; CHECK-NEXT: [[X:%.*]] = xor i64 [[C]], 8 ; CHECK-NEXT: call void @use(i32 [[B]]) ; CHECK-NEXT: ret i64 [[X]] @@ -68,8 +68,8 @@ define i64 @test4(i64 %a) { define i32 @test5(i32 %A) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[C:%.*]] = lshr i32 %A, 16 -; CHECK-NEXT: ret i32 [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 16 +; CHECK-NEXT: ret i32 [[TMP1]] ; %B = zext i32 %A to i128 %C = lshr i128 %B, 16 @@ -79,8 +79,8 @@ define i32 @test5(i32 %A) { define i32 @test6(i64 %A) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 32 -; CHECK-NEXT: [[D:%.*]] = trunc i64 [[C]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 32 +; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[D]] ; %B = zext i64 %A to i128 @@ -93,11 +93,12 @@ define i32 @test6(i64 %A) { ; but does contain sign bits, where the sign bit is not known to be zero. define i16 @ashr_mul_sign_bits(i8 %X, i8 %Y) { ; CHECK-LABEL: @ashr_mul_sign_bits( -; CHECK-NEXT: [[A:%.*]] = sext i8 %X to i16 -; CHECK-NEXT: [[B:%.*]] = sext i8 %Y to i16 +; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[B:%.*]] = sext i8 [[Y:%.*]] to i16 ; CHECK-NEXT: [[C:%.*]] = mul nsw i16 [[A]], [[B]] ; CHECK-NEXT: [[D:%.*]] = ashr i16 [[C]], 3 ; CHECK-NEXT: ret i16 [[D]] +; %A = sext i8 %X to i32 %B = sext i8 %Y to i32 %C = mul i32 %A, %B @@ -108,11 +109,12 @@ define i16 @ashr_mul_sign_bits(i8 %X, i8 %Y) { define i16 @ashr_mul(i8 %X, i8 %Y) { ; CHECK-LABEL: @ashr_mul( -; CHECK-NEXT: [[A:%.*]] = sext i8 %X to i16 -; CHECK-NEXT: [[B:%.*]] = sext i8 %Y to i16 +; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[B:%.*]] = sext i8 [[Y:%.*]] to i16 ; CHECK-NEXT: [[C:%.*]] = mul nsw i16 [[A]], [[B]] ; CHECK-NEXT: [[D:%.*]] = ashr i16 [[C]], 8 ; CHECK-NEXT: ret i16 [[D]] +; %A = sext i8 %X to i20 %B = sext i8 %Y to i20 %C = mul i20 %A, %B @@ -149,7 +151,7 @@ define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) { define i92 @test7(i64 %A) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 %A, 32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 32 ; CHECK-NEXT: [[D:%.*]] = zext i64 [[TMP1]] to i92 ; CHECK-NEXT: ret i92 [[D]] ; @@ -161,8 +163,8 @@ define i92 @test7(i64 %A) { define i64 @test8(i32 %A, i32 %B) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP38:%.*]] = zext i32 %A to i64 -; CHECK-NEXT: [[TMP32:%.*]] = zext i32 %B to i64 +; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 32 ; CHECK-NEXT: [[INS35:%.*]] = or i64 [[TMP33]], [[TMP38]] ; CHECK-NEXT: ret i64 [[INS35]] @@ -177,7 +179,7 @@ define i64 @test8(i32 %A, i32 %B) { define i8 @test9(i32 %X) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %X to i8 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 ; CHECK-NEXT: [[Z:%.*]] = and i8 [[TMP1]], 42 ; CHECK-NEXT: ret i8 [[Z]] ; @@ -189,7 +191,7 @@ define i8 @test9(i32 %X) { ; rdar://8808586 define i8 @test10(i32 %X) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[Y:%.*]] = trunc i32 %X to i8 +; CHECK-NEXT: [[Y:%.*]] = trunc i32 [[X:%.*]] to i8 ; CHECK-NEXT: [[Z:%.*]] = and i8 [[Y]], 42 ; CHECK-NEXT: ret i8 [[Z]] ; @@ -204,7 +206,7 @@ define i8 @test10(i32 %X) { define i32 @trunc_bitcast1(<4 x i32> %v) { ; CHECK-LABEL: @trunc_bitcast1( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> %v, i32 1 +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[V:%.*]], i32 1 ; CHECK-NEXT: ret i32 [[EXT]] ; %bc = bitcast <4 x i32> %v to i128 @@ -217,7 +219,7 @@ define i32 @trunc_bitcast1(<4 x i32> %v) { define i32 @trunc_bitcast2(<2 x i64> %v) { ; CHECK-LABEL: @trunc_bitcast2( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i64> %v to <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[BC1]], i32 2 ; CHECK-NEXT: ret i32 [[EXT]] ; @@ -231,7 +233,7 @@ define i32 @trunc_bitcast2(<2 x i64> %v) { define i32 @trunc_bitcast3(<4 x i32> %v) { ; CHECK-LABEL: @trunc_bitcast3( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> %v, i32 0 +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[V:%.*]], i32 0 ; CHECK-NEXT: ret i32 [[EXT]] ; %bc = bitcast <4 x i32> %v to i128 @@ -241,7 +243,7 @@ define i32 @trunc_bitcast3(<4 x i32> %v) { define i32 @trunc_shl_31_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_31_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -252,7 +254,7 @@ define i32 @trunc_shl_31_i32_i64(i64 %val) { define i32 @trunc_shl_nsw_31_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_nsw_31_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -263,7 +265,7 @@ define i32 @trunc_shl_nsw_31_i32_i64(i64 %val) { define i32 @trunc_shl_nuw_31_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_nuw_31_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -274,7 +276,7 @@ define i32 @trunc_shl_nuw_31_i32_i64(i64 %val) { define i32 @trunc_shl_nsw_nuw_31_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_nsw_nuw_31_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -285,7 +287,7 @@ define i32 @trunc_shl_nsw_nuw_31_i32_i64(i64 %val) { define i16 @trunc_shl_15_i16_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_15_i16_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i16 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i16 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i16 [[VAL_TR]], 15 ; CHECK-NEXT: ret i16 [[TRUNC]] ; @@ -296,7 +298,7 @@ define i16 @trunc_shl_15_i16_i64(i64 %val) { define i16 @trunc_shl_15_i16_i32(i32 %val) { ; CHECK-LABEL: @trunc_shl_15_i16_i32( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i32 %val to i16 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i32 [[VAL:%.*]] to i16 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i16 [[VAL_TR]], 15 ; CHECK-NEXT: ret i16 [[TRUNC]] ; @@ -307,7 +309,7 @@ define i16 @trunc_shl_15_i16_i32(i32 %val) { define i8 @trunc_shl_7_i8_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_7_i8_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i8 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i8 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i8 [[VAL_TR]], 7 ; CHECK-NEXT: ret i8 [[TRUNC]] ; @@ -318,7 +320,7 @@ define i8 @trunc_shl_7_i8_i64(i64 %val) { define i2 @trunc_shl_1_i2_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_1_i2_i64( -; CHECK-NEXT: [[SHL:%.*]] = shl i64 %val, 1 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[VAL:%.*]], 1 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHL]] to i2 ; CHECK-NEXT: ret i2 [[TRUNC]] ; @@ -329,7 +331,7 @@ define i2 @trunc_shl_1_i2_i64(i64 %val) { define i32 @trunc_shl_1_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_1_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 1 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -340,7 +342,7 @@ define i32 @trunc_shl_1_i32_i64(i64 %val) { define i32 @trunc_shl_16_i32_i64(i64 %val) { ; CHECK-LABEL: @trunc_shl_16_i32_i64( -; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 %val to i32 +; CHECK-NEXT: [[VAL_TR:%.*]] = trunc i64 [[VAL:%.*]] to i32 ; CHECK-NEXT: [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 16 ; CHECK-NEXT: ret i32 [[TRUNC]] ; @@ -370,7 +372,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) { ; TODO: Should be able to handle vectors define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> %val, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; @@ -381,7 +383,7 @@ define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { define <2 x i32> @trunc_shl_nosplat_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_nosplat_v2i32_v2i64( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> %val, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; @@ -392,10 +394,10 @@ define <2 x i32> @trunc_shl_nosplat_v2i32_v2i64(<2 x i64> %val) { define void @trunc_shl_31_i32_i64_multi_use(i64 %val, i32 addrspace(1)* %ptr0, i64 addrspace(1)* %ptr1) { ; CHECK-LABEL: @trunc_shl_31_i32_i64_multi_use( -; CHECK-NEXT: [[SHL:%.*]] = shl i64 %val, 31 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[VAL:%.*]], 31 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHL]] to i32 -; CHECK-NEXT: store volatile i32 [[TRUNC]], i32 addrspace(1)* %ptr0, align 4 -; CHECK-NEXT: store volatile i64 [[SHL]], i64 addrspace(1)* %ptr1, align 8 +; CHECK-NEXT: store volatile i32 [[TRUNC]], i32 addrspace(1)* [[PTR0:%.*]], align 4 +; CHECK-NEXT: store volatile i64 [[SHL]], i64 addrspace(1)* [[PTR1:%.*]], align 8 ; CHECK-NEXT: ret void ; %shl = shl i64 %val, 31 @@ -407,7 +409,7 @@ define void @trunc_shl_31_i32_i64_multi_use(i64 %val, i32 addrspace(1)* %ptr0, i define i32 @trunc_shl_lshr_infloop(i64 %arg) { ; CHECK-LABEL: @trunc_shl_lshr_infloop( -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 %arg, 1 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[ARG:%.*]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -420,7 +422,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) { define i32 @trunc_shl_ashr_infloop(i64 %arg) { ; CHECK-LABEL: @trunc_shl_ashr_infloop( -; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 %arg, 3 +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 [[ARG:%.*]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -433,7 +435,7 @@ define i32 @trunc_shl_ashr_infloop(i64 %arg) { define i32 @trunc_shl_shl_infloop(i64 %arg) { ; CHECK-LABEL: @trunc_shl_shl_infloop( -; CHECK-NEXT: [[ARG_TR:%.*]] = trunc i64 %arg to i32 +; CHECK-NEXT: [[ARG_TR:%.*]] = trunc i64 [[ARG:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG_TR]], 3 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -445,7 +447,7 @@ define i32 @trunc_shl_shl_infloop(i64 %arg) { define i32 @trunc_shl_lshr_var(i64 %arg, i64 %val) { ; CHECK-LABEL: @trunc_shl_lshr_var( -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 %arg, %val +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[ARG:%.*]], [[VAL:%.*]] ; CHECK-NEXT: [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -458,7 +460,7 @@ define i32 @trunc_shl_lshr_var(i64 %arg, i64 %val) { define i32 @trunc_shl_ashr_var(i64 %arg, i64 %val) { ; CHECK-LABEL: @trunc_shl_ashr_var( -; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 %arg, %val +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 [[ARG:%.*]], [[VAL:%.*]] ; CHECK-NEXT: [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -471,7 +473,7 @@ define i32 @trunc_shl_ashr_var(i64 %arg, i64 %val) { define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { ; CHECK-LABEL: @trunc_shl_shl_var( -; CHECK-NEXT: [[TMP0:%.*]] = shl i64 %arg, %val +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[ARG:%.*]], [[VAL:%.*]] ; CHECK-NEXT: [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -484,7 +486,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15( -; CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> %a, +; CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A:%.*]], ; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; @@ -513,7 +515,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) { define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4( -; CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> %a, +; CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A:%.*]], ; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; @@ -527,7 +529,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { define <4 x i8> @wide_shuf(<4 x i32> %x) { ; CHECK-LABEL: @wide_shuf( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> %x, <4 x i32> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <4 x i32> [[SHUF]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[TRUNC]] ; @@ -540,7 +542,7 @@ define <4 x i8> @wide_shuf(<4 x i32> %x) { define <4 x i8> @wide_splat1(<4 x i32> %x) { ; CHECK-LABEL: @wide_splat1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> %x to <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i8> ; CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[TRUNC]] ; @@ -554,7 +556,7 @@ define <4 x i8> @wide_splat1(<4 x i32> %x) { define <3 x i31> @wide_splat2(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat2( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <3 x i33> %x to <3 x i31> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <3 x i33> [[X:%.*]] to <3 x i31> ; CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <3 x i31> [[TMP1]], <3 x i31> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; @@ -569,7 +571,7 @@ define <3 x i31> @wide_splat2(<3 x i33> %x) { define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat3( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> [[X:%.*]], <3 x i33> undef, <3 x i32> ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; @@ -582,7 +584,7 @@ define <3 x i31> @wide_splat3(<3 x i33> %x) { define <8 x i8> @wide_lengthening_splat(<4 x i16> %v) { ; CHECK-LABEL: @wide_lengthening_splat( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i16> [[SHUF]] to <8 x i8> ; CHECK-NEXT: ret <8 x i8> [[TR]] ; @@ -593,7 +595,7 @@ define <8 x i8> @wide_lengthening_splat(<4 x i16> %v) { define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) { ; CHECK-LABEL: @narrow_add_vec_constant( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> ; CHECK-NEXT: [[TR:%.*]] = add <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[TR]] ; @@ -604,7 +606,7 @@ define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) { define <2 x i8> @narrow_mul_vec_constant(<2 x i32> %x) { ; CHECK-LABEL: @narrow_mul_vec_constant( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> ; CHECK-NEXT: [[TR:%.*]] = mul <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[TR]] ; @@ -615,7 +617,7 @@ define <2 x i8> @narrow_mul_vec_constant(<2 x i32> %x) { define <2 x i8> @narrow_sub_vec_constant(<2 x i32> %x) { ; CHECK-LABEL: @narrow_sub_vec_constant( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> ; CHECK-NEXT: [[TR:%.*]] = sub <2 x i8> , [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[TR]] ; diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll index 08fb6112e57a..5f981ed125ed 100644 --- a/test/Transforms/InstSimplify/fast-math.ll +++ b/test/Transforms/InstSimplify/fast-math.ll @@ -56,8 +56,8 @@ define float @no_mul_zero_3(float %a) { ; -X + X --> 0.0 (with nnan on the fadd) -define float @fadd_fnegx(float %x) { -; CHECK-LABEL: @fadd_fnegx( +define float @fadd_binary_fnegx(float %x) { +; CHECK-LABEL: @fadd_binary_fnegx( ; CHECK-NEXT: ret float 0.000000e+00 ; %negx = fsub float -0.0, %x @@ -65,10 +65,19 @@ define float @fadd_fnegx(float %x) { ret float %r } +define float @fadd_unary_fnegx(float %x) { +; CHECK-LABEL: @fadd_unary_fnegx( +; CHECK-NEXT: ret float 0.000000e+00 +; + %negx = fneg float %x + %r = fadd nnan float %negx, %x + ret float %r +} + ; X + -X --> 0.0 (with nnan on the fadd) -define <2 x float> @fadd_fnegx_commute_vec(<2 x float> %x) { -; CHECK-LABEL: @fadd_fnegx_commute_vec( +define <2 x float> @fadd_binary_fnegx_commute_vec(<2 x float> %x) { +; CHECK-LABEL: @fadd_binary_fnegx_commute_vec( ; CHECK-NEXT: ret <2 x float> zeroinitializer ; %negx = fsub <2 x float> , %x @@ -76,6 +85,15 @@ define <2 x float> @fadd_fnegx_commute_vec(<2 x float> %x) { ret <2 x float> %r } +define <2 x float> @fadd_unary_fnegx_commute_vec(<2 x float> %x) { +; CHECK-LABEL: @fadd_unary_fnegx_commute_vec( +; CHECK-NEXT: ret <2 x float> zeroinitializer +; + %negx = fneg <2 x float> %x + %r = fadd nnan <2 x float> %x, %negx + ret <2 x float> %r +} + define <2 x float> @fadd_fnegx_commute_vec_undef(<2 x float> %x) { ; CHECK-LABEL: @fadd_fnegx_commute_vec_undef( ; CHECK-NEXT: ret <2 x float> zeroinitializer diff --git a/test/Transforms/JumpThreading/stale-loop-info-after-unfold-select.ll b/test/Transforms/JumpThreading/stale-loop-info-after-unfold-select.ll new file mode 100644 index 000000000000..7dbc794c3639 --- /dev/null +++ b/test/Transforms/JumpThreading/stale-loop-info-after-unfold-select.ll @@ -0,0 +1,30 @@ +; RUN: opt -passes='require,jump-threading,verify' -S < %s + +%"type1" = type { i8 } +%"type2" = type opaque + +define dso_local i16* @func2(%"type1"* %this, %"type2"*) { +entry: + br label %while.cond + +while.cond: ; preds = %func1.exit, %entry + %month.0 = phi i32 [ undef, %entry ], [ %month.0.be, %func1.exit ] + switch i32 %month.0, label %if.end.i [ + i32 4, label %func1.exit + i32 1, label %func1.exit + ] + +if.end.i: ; preds = %while.cond + br label %func1.exit + +func1.exit: ; preds = %if.end.i, %while.cond, %while.cond + %retval.0.i = phi i32 [ 9, %if.end.i ], [ 0, %while.cond ], [ 0, %while.cond ] + %call2 = tail call signext i32 @func3(i32 signext %retval.0.i, i32 signext 1, i32 signext 3) + %cmp = icmp slt i32 %call2, 1 + %add = add nsw i32 %call2, 2 + %month.0.be = select i1 %cmp, i32 %add, i32 %call2 + br label %while.cond +} + +declare i32 @func3(i32, i32, i32) + diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp index 9c8f3b1d43a9..b0f13d84af4e 100644 --- a/unittests/ADT/TripleTest.cpp +++ b/unittests/ADT/TripleTest.cpp @@ -552,6 +552,13 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::OpenEmbedded, T.getVendor()); EXPECT_EQ(Triple::Linux, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + EXPECT_TRUE(T.isArch64Bit()); + + T = Triple("arm64_32-apple-ios"); + EXPECT_EQ(Triple::aarch64_32, T.getArch()); + EXPECT_EQ(Triple::IOS, T.getOS()); + EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + EXPECT_TRUE(T.isArch32Bit()); T = Triple("huh"); EXPECT_EQ(Triple::UnknownArch, T.getArch()); @@ -1457,6 +1464,10 @@ TEST(TripleTest, ParseARMArch) { Triple T = Triple("arm64"); EXPECT_EQ(Triple::aarch64, T.getArch()); } + { + Triple T = Triple("arm64_32"); + EXPECT_EQ(Triple::aarch64_32, T.getArch()); + } { Triple T = Triple("aarch64"); EXPECT_EQ(Triple::aarch64, T.getArch()); diff --git a/unittests/Support/TargetParserTest.cpp b/unittests/Support/TargetParserTest.cpp index a973d980d265..08ed7645ea0e 100644 --- a/unittests/Support/TargetParserTest.cpp +++ b/unittests/Support/TargetParserTest.cpp @@ -658,12 +658,15 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) { } EXPECT_EQ(ARM::EndianKind::LITTLE, ARM::parseArchEndian("aarch64")); + EXPECT_EQ(ARM::EndianKind::LITTLE, ARM::parseArchEndian("arm64_32")); EXPECT_EQ(ARM::EndianKind::BIG, ARM::parseArchEndian("aarch64_be")); EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("aarch64")); EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("aarch64_be")); EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("arm64")); EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("arm64_be")); + EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("arm64_32")); + EXPECT_EQ(ARM::ISAKind::AARCH64, ARM::parseArchISA("aarch64_32")); } TEST(TargetParserTest, ARMparseArchProfile) { diff --git a/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn index 7c6d28025a80..1cfee43e7005 100644 --- a/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn +++ b/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn @@ -60,15 +60,15 @@ static_library("LLVMHexagonCodeGen") { "HexagonGenPredicate.cpp", "HexagonHardwareLoops.cpp", "HexagonHazardRecognizer.cpp", - "HexagonInstrInfo.cpp", "HexagonISelDAGToDAG.cpp", "HexagonISelDAGToDAGHVX.cpp", "HexagonISelLowering.cpp", "HexagonISelLoweringHVX.cpp", + "HexagonInstrInfo.cpp", "HexagonLoopIdiomRecognition.cpp", + "HexagonMCInstLower.cpp", "HexagonMachineFunctionInfo.cpp", "HexagonMachineScheduler.cpp", - "HexagonMCInstLower.cpp", "HexagonNewValueJump.cpp", "HexagonOptAddrMode.cpp", "HexagonOptimizeSZextends.cpp", @@ -83,10 +83,10 @@ static_library("LLVMHexagonCodeGen") { "HexagonTargetMachine.cpp", "HexagonTargetObjectFile.cpp", "HexagonTargetTransformInfo.cpp", - "HexagonVectorLoopCarriedReuse.cpp", - "HexagonVectorPrint.cpp", "HexagonVExtract.cpp", "HexagonVLIWPacketizer.cpp", + "HexagonVectorLoopCarriedReuse.cpp", + "HexagonVectorPrint.cpp", "RDFCopy.cpp", "RDFDeadCode.cpp", "RDFGraph.cpp", diff --git a/utils/gn/secondary/llvm/lib/Target/Lanai/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/Lanai/BUILD.gn index 6600769cb745..85903af80f6a 100644 --- a/utils/gn/secondary/llvm/lib/Target/Lanai/BUILD.gn +++ b/utils/gn/secondary/llvm/lib/Target/Lanai/BUILD.gn @@ -33,11 +33,11 @@ static_library("LLVMLanaiCodeGen") { "LanaiAsmPrinter.cpp", "LanaiDelaySlotFiller.cpp", "LanaiFrameLowering.cpp", - "LanaiInstrInfo.cpp", "LanaiISelDAGToDAG.cpp", "LanaiISelLowering.cpp", - "LanaiMachineFunctionInfo.cpp", + "LanaiInstrInfo.cpp", "LanaiMCInstLower.cpp", + "LanaiMachineFunctionInfo.cpp", "LanaiMemAluCombiner.cpp", "LanaiRegisterInfo.cpp", "LanaiSelectionDAGInfo.cpp", diff --git a/utils/gn/secondary/llvm/lib/Target/Sparc/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/Sparc/BUILD.gn index de51b1006b33..89242e0bae26 100644 --- a/utils/gn/secondary/llvm/lib/Target/Sparc/BUILD.gn +++ b/utils/gn/secondary/llvm/lib/Target/Sparc/BUILD.gn @@ -32,15 +32,15 @@ static_library("LLVMSparcCodeGen") { "DelaySlotFiller.cpp", "LeonPasses.cpp", "SparcAsmPrinter.cpp", - "SparcInstrInfo.cpp", + "SparcFrameLowering.cpp", "SparcISelDAGToDAG.cpp", "SparcISelLowering.cpp", - "SparcFrameLowering.cpp", + "SparcInstrInfo.cpp", + "SparcMCInstLower.cpp", "SparcMachineFunctionInfo.cpp", "SparcRegisterInfo.cpp", "SparcSubtarget.cpp", "SparcTargetMachine.cpp", - "SparcMCInstLower.cpp", "SparcTargetObjectFile.cpp", ] } diff --git a/utils/gn/secondary/llvm/lib/Target/Sparc/MCTargetDesc/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/Sparc/MCTargetDesc/BUILD.gn index 27e5a778a6b5..15e9057fd0c1 100644 --- a/utils/gn/secondary/llvm/lib/Target/Sparc/MCTargetDesc/BUILD.gn +++ b/utils/gn/secondary/llvm/lib/Target/Sparc/MCTargetDesc/BUILD.gn @@ -63,8 +63,8 @@ static_library("MCTargetDesc") { "SparcInstPrinter.cpp", "SparcMCAsmInfo.cpp", "SparcMCCodeEmitter.cpp", - "SparcMCTargetDesc.cpp", "SparcMCExpr.cpp", + "SparcMCTargetDesc.cpp", "SparcTargetStreamer.cpp", ] } diff --git a/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index fb9b2ab82d05..db550dbe790e 100644 --- a/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -20,6 +20,7 @@ unittest("AnalysisTests") { "DivergenceAnalysisTest.cpp", "DomTreeUpdaterTest.cpp", "GlobalsModRefTest.cpp", + "IVDescriptorsTest.cpp", "LazyCallGraphTest.cpp", "LoopInfoTest.cpp", "MemoryBuiltinsTest.cpp",