Skip to content

Commit 0af3e6a

Browse files
committed
[InstCombine] Sink instructions with multiple users in a successor block.
This patch tries to sink instructions when they are only used in a successor block. This is a further enhancement patch based on Anna's commit: D109700, which allows sinking an instruction having multiple uses in a single user. In this patch, sink instructions with multiple users in a single successor block will be supported. It could fix a known issue from rust: rust-lang/rust#51346 (comment) Reviewed By: nikic, reames Differential Revision: https://reviews.llvm.org/D121585
1 parent c236b41 commit 0af3e6a

17 files changed

+205
-181
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+57-34
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ static cl::opt<bool>
138138
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
139139
cl::init(true));
140140

141+
static cl::opt<unsigned> MaxSinkNumUsers(
142+
"instcombine-max-sink-users", cl::init(32),
143+
cl::desc("Maximum number of undroppable users for instruction sinking"));
144+
141145
static cl::opt<unsigned> LimitMaxIterations(
142146
"instcombine-max-iterations",
143147
cl::desc("Limit the maximum number of instruction combining iterations"),
@@ -3859,7 +3863,6 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
38593863
/// block.
38603864
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
38613865
TargetLibraryInfo &TLI) {
3862-
assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
38633866
BasicBlock *SrcBlock = I->getParent();
38643867

38653868
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -4026,48 +4029,68 @@ bool InstCombinerImpl::run() {
40264029
[this](Instruction *I) -> Optional<BasicBlock *> {
40274030
if (!EnableCodeSinking)
40284031
return None;
4029-
auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
4030-
if (!UserInst)
4031-
return None;
40324032

40334033
BasicBlock *BB = I->getParent();
40344034
BasicBlock *UserParent = nullptr;
4035+
unsigned NumUsers = 0;
40354036

4036-
// Special handling for Phi nodes - get the block the use occurs in.
4037-
if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4038-
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4039-
if (PN->getIncomingValue(i) == I) {
4040-
// Bail out if we have uses in different blocks. We don't do any
4041-
// sophisticated analysis (i.e finding NearestCommonDominator of these
4042-
// use blocks).
4043-
if (UserParent && UserParent != PN->getIncomingBlock(i))
4044-
return None;
4045-
UserParent = PN->getIncomingBlock(i);
4037+
for (auto *U : I->users()) {
4038+
if (U->isDroppable())
4039+
continue;
4040+
if (NumUsers > MaxSinkNumUsers)
4041+
return None;
4042+
4043+
Instruction *UserInst = cast<Instruction>(U);
4044+
// Special handling for Phi nodes - get the block the use occurs in.
4045+
if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4046+
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4047+
if (PN->getIncomingValue(i) == I) {
4048+
// Bail out if we have uses in different blocks. We don't do any
4049+
// sophisticated analysis (i.e finding NearestCommonDominator of
4050+
// these use blocks).
4051+
if (UserParent && UserParent != PN->getIncomingBlock(i))
4052+
return None;
4053+
UserParent = PN->getIncomingBlock(i);
4054+
}
40464055
}
4056+
assert(UserParent && "expected to find user block!");
4057+
} else {
4058+
if (UserParent && UserParent != UserInst->getParent())
4059+
return None;
4060+
UserParent = UserInst->getParent();
40474061
}
4048-
assert(UserParent && "expected to find user block!");
4049-
} else
4050-
UserParent = UserInst->getParent();
40514062

4052-
// Try sinking to another block. If that block is unreachable, then do
4053-
// not bother. SimplifyCFG should handle it.
4054-
if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4055-
return None;
4063+
// Make sure these checks are done only once, naturally we do the checks
4064+
// the first time we get the userparent, this will save compile time.
4065+
if (NumUsers == 0) {
4066+
// Try sinking to another block. If that block is unreachable, then do
4067+
// not bother. SimplifyCFG should handle it.
4068+
if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4069+
return None;
4070+
4071+
auto *Term = UserParent->getTerminator();
4072+
// See if the user is one of our successors that has only one
4073+
// predecessor, so that we don't have to split the critical edge.
4074+
// Another option where we can sink is a block that ends with a
4075+
// terminator that does not pass control to other block (such as
4076+
// return or unreachable or resume). In this case:
4077+
// - I dominates the User (by SSA form);
4078+
// - the User will be executed at most once.
4079+
// So sinking I down to User is always profitable or neutral.
4080+
if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4081+
return None;
4082+
4083+
assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4084+
}
40564085

4057-
auto *Term = UserParent->getTerminator();
4058-
// See if the user is one of our successors that has only one
4059-
// predecessor, so that we don't have to split the critical edge.
4060-
// Another option where we can sink is a block that ends with a
4061-
// terminator that does not pass control to other block (such as
4062-
// return or unreachable or resume). In this case:
4063-
// - I dominates the User (by SSA form);
4064-
// - the User will be executed at most once.
4065-
// So sinking I down to User is always profitable or neutral.
4066-
if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
4067-
assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4068-
return UserParent;
4086+
NumUsers++;
40694087
}
4070-
return None;
4088+
4089+
// No user or only has droppable users.
4090+
if (!UserParent)
4091+
return None;
4092+
4093+
return UserParent;
40714094
};
40724095

40734096
auto OptBB = getOptionalSinkBlockForInst(I);

llvm/test/Transforms/InstCombine/intptr7.ll

+21-19
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@
44
define void @matching_phi(i64 %a, float* %b, i1 %cond) {
55
; CHECK-LABEL: @matching_phi(
66
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB1:%.*]]
8+
; CHECK: bb1:
9+
; CHECK-NEXT: [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
10+
; CHECK-NEXT: br label [[BB3:%.*]]
11+
; CHECK: bb2:
712
; CHECK-NEXT: [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
813
; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
9-
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BBB:%.*]], label [[A:%.*]]
10-
; CHECK: A:
11-
; CHECK-NEXT: [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
12-
; CHECK-NEXT: br label [[C:%.*]]
13-
; CHECK: Bbb:
1414
; CHECK-NEXT: store float 1.000000e+01, float* [[ADD]], align 4
15-
; CHECK-NEXT: br label [[C]]
16-
; CHECK: C:
17-
; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[A]] ], [ [[ADD]], [[BBB]] ]
15+
; CHECK-NEXT: br label [[BB3]]
16+
; CHECK: bb3:
17+
; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[BB1]] ], [ [[ADD]], [[BB2]] ]
1818
; CHECK-NEXT: [[I1:%.*]] = load float, float* [[A_ADDR_03]], align 4
1919
; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01
2020
; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4
@@ -27,16 +27,16 @@ entry:
2727

2828
%addb = getelementptr inbounds float, float* %b, i64 2
2929
%addb.int = ptrtoint float* %addb to i64
30-
br i1 %cmp1, label %A, label %Bbb
31-
A:
32-
br label %C
33-
Bbb:
30+
br i1 %cmp1, label %bb1, label %bb2
31+
bb1:
32+
br label %bb3
33+
bb2:
3434
store float 1.0e+01, float* %add, align 4
35-
br label %C
35+
br label %bb3
3636

37-
C:
38-
%a.addr.03 = phi float* [ %addb, %A ], [ %add, %Bbb ]
39-
%b.addr.02 = phi i64 [ %addb.int, %A ], [ %add.int, %Bbb ]
37+
bb3:
38+
%a.addr.03 = phi float* [ %addb, %bb1 ], [ %add, %bb2 ]
39+
%b.addr.02 = phi i64 [ %addb.int, %bb1 ], [ %add.int, %bb2 ]
4040
%i0 = inttoptr i64 %b.addr.02 to float*
4141
%i1 = load float, float* %i0, align 4
4242
%mul.i = fmul float %i1, 4.200000e+01
@@ -48,18 +48,20 @@ define void @no_matching_phi(i64 %a, float* %b, i1 %cond) {
4848
; CHECK-LABEL: @no_matching_phi(
4949
; CHECK-NEXT: entry:
5050
; CHECK-NEXT: [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
51-
; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
5251
; CHECK-NEXT: [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
5352
; CHECK-NEXT: br i1 [[COND:%.*]], label [[B:%.*]], label [[A:%.*]]
5453
; CHECK: A:
5554
; CHECK-NEXT: br label [[C:%.*]]
5655
; CHECK: B:
56+
; CHECK-NEXT: [[ADDB_INT:%.*]] = ptrtoint float* [[ADDB]] to i64
57+
; CHECK-NEXT: [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
5758
; CHECK-NEXT: store float 1.000000e+01, float* [[ADD]], align 4
5859
; CHECK-NEXT: br label [[C]]
5960
; CHECK: C:
6061
; CHECK-NEXT: [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[A]] ], [ [[ADD]], [[B]] ]
61-
; CHECK-NEXT: [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD]], [[A]] ], [ [[ADDB]], [[B]] ]
62-
; CHECK-NEXT: [[I1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
62+
; CHECK-NEXT: [[B_ADDR_02:%.*]] = phi i64 [ [[ADD_INT]], [[A]] ], [ [[ADDB_INT]], [[B]] ]
63+
; CHECK-NEXT: [[I0:%.*]] = inttoptr i64 [[B_ADDR_02]] to float*
64+
; CHECK-NEXT: [[I1:%.*]] = load float, float* [[I0]], align 4
6365
; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01
6466
; CHECK-NEXT: store float [[MUL_I]], float* [[A_ADDR_03]], align 4
6567
; CHECK-NEXT: ret void

llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ define void @bar(i1 %flag) #0 !dbg !4 {
1111
; CHECK-NEXT: entry:
1212
; CHECK-NEXT: [[TEXT:%.*]] = alloca [1 x i8], align 1
1313
; CHECK-NEXT: [[BUFF:%.*]] = alloca [1 x i8], align 1
14-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
15-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
1614
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
1715
; CHECK: if:
1816
; CHECK-NEXT: br label [[BB2:%.*]]
1917
; CHECK: bb2:
2018
; CHECK-NEXT: br label [[BB3:%.*]]
2119
; CHECK: bb3:
22-
; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]]
20+
; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
2321
; CHECK-NEXT: br label [[FIN:%.*]]
2422
; CHECK: else:
23+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
24+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
2525
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]])
2626
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP1]])
2727
; CHECK-NEXT: call void @foo(i8* [[TMP1]], i8* [[TMP0]])

llvm/test/Transforms/InstCombine/lifetime.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ define void @bar(i1 %flag) !dbg !4 {
1111
; CHECK-NEXT: entry:
1212
; CHECK-NEXT: [[TEXT:%.*]] = alloca [1 x i8], align 1
1313
; CHECK-NEXT: [[BUFF:%.*]] = alloca [1 x i8], align 1
14-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
15-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
1614
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
1715
; CHECK: if:
1816
; CHECK-NEXT: br label [[BB2:%.*]]
1917
; CHECK: bb2:
2018
; CHECK-NEXT: br label [[BB3:%.*]]
2119
; CHECK: bb3:
22-
; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]]
20+
; CHECK-NEXT: call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
2321
; CHECK-NEXT: br label [[FIN:%.*]]
2422
; CHECK: else:
23+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
24+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
2525
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP0]])
2626
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP1]])
2727
; CHECK-NEXT: call void @foo(i8* nonnull [[TMP1]], i8* nonnull [[TMP0]])

llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define void @_Z4testv() {
1515
; CHECK-NEXT: [[I:%.*]] = load i8, i8* @var_7, align 1
1616
; CHECK-NEXT: [[I1:%.*]] = icmp eq i8 [[I]], -1
1717
; CHECK-NEXT: [[I4:%.*]] = load i16, i16* @var_0, align 2
18-
; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32
1918
; CHECK-NEXT: br i1 [[I1]], label [[BB10:%.*]], label [[BB9:%.*]]
2019
; CHECK: bb9:
2120
; CHECK-NEXT: br label [[BB12:%.*]]
@@ -31,6 +30,7 @@ define void @_Z4testv() {
3130
; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ]
3231
; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 0), align 4
3332
; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 0), align 2
33+
; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32
3434
; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 0), align 16
3535
; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4
3636
; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 1), align 2

llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ define void @f(i1 %cond) {
1414
; CHECK-LABEL: @f(
1515
; CHECK-NEXT: bb0:
1616
; CHECK-NEXT: [[T12:%.*]] = alloca [2 x i32], align 8
17-
; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0
1817
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
1918
; CHECK: bb1:
2019
; CHECK-NEXT: unreachable
2120
; CHECK: bb2:
21+
; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0
2222
; CHECK-NEXT: [[T9:%.*]] = load i16*, i16** @b, align 2
2323
; CHECK-NEXT: store i16 0, i16* [[T9]], align 2
2424
; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[T12_SUB]], align 8

llvm/test/Transforms/InstCombine/shift-by-signext.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@ define <2 x i32> @t5_vec_ashr(<2 x i32> %x, <2 x i8> %shamt) {
6969
define i32 @t6_twoshifts(i32 %x, i8 %shamt) {
7070
; CHECK-LABEL: @t6_twoshifts(
7171
; CHECK-NEXT: bb:
72-
; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
7372
; CHECK-NEXT: br label [[WORK:%.*]]
7473
; CHECK: work:
7574
; CHECK-NEXT: br label [[END:%.*]]
7675
; CHECK: end:
76+
; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
7777
; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]]
7878
; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]]
7979
; CHECK-NEXT: ret i32 [[R]]
@@ -151,11 +151,11 @@ define i32 @n11_extrause(i32 %x, i8 %shamt) {
151151
}
152152
define i32 @n12_twoshifts_and_extrause(i32 %x, i8 %shamt) {
153153
; CHECK-LABEL: @n12_twoshifts_and_extrause(
154-
; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
155154
; CHECK-NEXT: br label [[WORK:%.*]]
156155
; CHECK: work:
157156
; CHECK-NEXT: br label [[END:%.*]]
158157
; CHECK: end:
158+
; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
159159
; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]]
160160
; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]]
161161
; CHECK-NEXT: call void @use32(i32 [[SHAMT_WIDE]])

llvm/test/Transforms/InstCombine/sink_instruction.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -241,16 +241,15 @@ else:
241241
declare void @abort()
242242
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
243243
declare void @dummy(i64)
244-
; Todo: Two uses in two different users of a single successor block. We can sink.
244+
; Two uses in two different users of a single successor block. We can sink.
245245
define i64 @test8(i64 %c) {
246246
; CHECK-LABEL: @test8(
247247
; CHECK-NEXT: bb1:
248248
; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i64 [[C:%.*]], 2305843009213693951
249-
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[OVERFLOW]], i64 0, i64 8
250249
; CHECK-NEXT: br i1 [[OVERFLOW]], label [[ABORT:%.*]], label [[BB2:%.*]]
251250
; CHECK: bb2:
252-
; CHECK-NEXT: call void @dummy(i64 [[SELECT]])
253-
; CHECK-NEXT: ret i64 [[SELECT]]
251+
; CHECK-NEXT: call void @dummy(i64 8)
252+
; CHECK-NEXT: ret i64 8
254253
; CHECK: abort:
255254
; CHECK-NEXT: call void @abort()
256255
; CHECK-NEXT: unreachable

llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,20 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
3333
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
3434
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]]
3535
; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
36-
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
3736
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
3837
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
3938
; CHECK: for.body.epil.1:
39+
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
4040
; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
4141
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
4242
; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
4343
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
4444
; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
4545
; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
46-
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
4746
; CHECK-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
4847
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]]
4948
; CHECK: for.body.epil.2:
49+
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
5050
; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
5151
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
5252
; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]

0 commit comments

Comments
 (0)