Skip to content

Commit a54712c

Browse files
[LSR] Make canHoistIVInc allow non-integer types (#143707)
canHoistIVInc was made to only allow integer types to avoid a crash in isIndexedLoadLegal/isIndexedStoreLegal due to them failing an assertion in getValueType (or rather in MVT::getVT which gets called from that) when passed a struct type. Adjusting these functions to pass AllowUnknown=true to getValueType means we don't get an assertion failure (MVT::Other is returned which TLI->isIndexedLoadLegal should then return false for), meaning we can remove this check for integer type.
1 parent d57b867 commit a54712c

File tree

3 files changed

+193
-5
lines changed

3 files changed

+193
-5
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,12 +478,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
478478
}
479479

480480
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty) const override {
481-
EVT VT = getTLI()->getValueType(DL, Ty);
481+
EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);
482482
return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
483483
}
484484

485485
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty) const override {
486-
EVT VT = getTLI()->getValueType(DL, Ty);
486+
EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);
487487
return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
488488
}
489489

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6008,9 +6008,8 @@ static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
60086008

60096009
Instruction *I = Fixup.UserInst;
60106010
Type *Ty = I->getType();
6011-
return Ty->isIntegerTy() &&
6012-
((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
6013-
(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
6011+
return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
6012+
(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));
60146013
}
60156014

60166015
/// Rewrite all the fixup locations with new values, following the chosen
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64-none-elf | FileCheck %s
3+
4+
; Check that the load in the loop has postindex addressing, regardless of the
5+
; type or whether the input uses postindex or offset addressing.
6+
7+
define i32 @i32_initially_postidx(ptr %p, i64 %n) {
8+
; CHECK-LABEL: i32_initially_postidx:
9+
; CHECK: // %bb.0: // %entry
10+
; CHECK-NEXT: cmp x1, #1
11+
; CHECK-NEXT: b.lt .LBB0_5
12+
; CHECK-NEXT: // %bb.1: // %for.body.preheader
13+
; CHECK-NEXT: mov w8, wzr
14+
; CHECK-NEXT: .LBB0_2: // %for.body
15+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
16+
; CHECK-NEXT: ldr w9, [x0], #4
17+
; CHECK-NEXT: add w8, w8, w9
18+
; CHECK-NEXT: cmp w8, #0
19+
; CHECK-NEXT: b.lo .LBB0_5
20+
; CHECK-NEXT: // %bb.3: // %for.inc
21+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
22+
; CHECK-NEXT: subs x1, x1, #1
23+
; CHECK-NEXT: b.ne .LBB0_2
24+
; CHECK-NEXT: // %bb.4: // %cleanup
25+
; CHECK-NEXT: mov w0, w8
26+
; CHECK-NEXT: ret
27+
; CHECK-NEXT: .LBB0_5:
28+
; CHECK-NEXT: mov w8, wzr
29+
; CHECK-NEXT: mov w0, w8
30+
; CHECK-NEXT: ret
31+
entry:
32+
%cmp1 = icmp sgt i64 %n, 0
33+
br i1 %cmp1, label %for.body, label %cleanup
34+
35+
for.body:
36+
%iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
37+
%accum = phi i32 [ %add, %for.inc ], [ 0, %entry ]
38+
%ptr = phi ptr [ %ptr.next, %for.inc ], [ %p, %entry ]
39+
%val = load i32, ptr %ptr, align 4
40+
%ptr.next = getelementptr inbounds nuw i8, ptr %ptr, i64 4
41+
%add = add i32 %accum, %val
42+
%cmp2 = icmp ult i32 %add, 0
43+
br i1 %cmp2, label %cleanup, label %for.inc
44+
45+
for.inc:
46+
%iv.next = add nuw nsw i64 %iv, 1
47+
%exitcond = icmp eq i64 %iv.next, %n
48+
br i1 %exitcond, label %cleanup, label %for.body
49+
50+
cleanup:
51+
%ret = phi i32 [ 0, %entry ], [ 0, %for.body ], [ %add, %for.inc ]
52+
ret i32 %ret
53+
}
54+
55+
define i32 @i32_initially_offset(ptr %p, i64 %n) {
56+
; CHECK-LABEL: i32_initially_offset:
57+
; CHECK: // %bb.0: // %entry
58+
; CHECK-NEXT: cmp x1, #1
59+
; CHECK-NEXT: b.lt .LBB1_5
60+
; CHECK-NEXT: // %bb.1: // %for.body.preheader
61+
; CHECK-NEXT: mov w8, wzr
62+
; CHECK-NEXT: .LBB1_2: // %for.body
63+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
64+
; CHECK-NEXT: ldr w9, [x0], #4
65+
; CHECK-NEXT: add w8, w8, w9
66+
; CHECK-NEXT: cmp w8, #0
67+
; CHECK-NEXT: b.lo .LBB1_5
68+
; CHECK-NEXT: // %bb.3: // %for.cond
69+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
70+
; CHECK-NEXT: subs x1, x1, #1
71+
; CHECK-NEXT: b.ne .LBB1_2
72+
; CHECK-NEXT: // %bb.4: // %cleanup
73+
; CHECK-NEXT: mov w0, w8
74+
; CHECK-NEXT: ret
75+
; CHECK-NEXT: .LBB1_5:
76+
; CHECK-NEXT: mov w8, wzr
77+
; CHECK-NEXT: mov w0, w8
78+
; CHECK-NEXT: ret
79+
entry:
80+
%cmp1 = icmp sgt i64 %n, 0
81+
br i1 %cmp1, label %for.body, label %cleanup
82+
83+
for.cond:
84+
%iv.next = add nuw nsw i64 %iv, 1
85+
%exitcond = icmp eq i64 %iv.next, %n
86+
br i1 %exitcond, label %cleanup, label %for.body
87+
88+
for.body:
89+
%iv = phi i64 [ %iv.next, %for.cond ], [ 0, %entry ]
90+
%accum = phi i32 [ %add, %for.cond ], [ 0, %entry ]
91+
%arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %iv
92+
%val = load i32, ptr %arrayidx, align 4
93+
%add = add i32 %accum, %val
94+
%cmp2 = icmp ult i32 %add, 0
95+
br i1 %cmp2, label %cleanup, label %for.cond
96+
97+
cleanup:
98+
%ret = phi i32 [ 0, %entry ], [ 0, %for.body ], [ %add, %for.cond ]
99+
ret i32 %ret
100+
}
101+
102+
define float @float_initially_postidx(ptr %p, i64 %n) {
103+
; CHECK-LABEL: float_initially_postidx:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK-NEXT: movi d0, #0000000000000000
106+
; CHECK-NEXT: cmp x1, #1
107+
; CHECK-NEXT: b.lt .LBB2_3
108+
; CHECK-NEXT: .LBB2_1: // %for.body
109+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
110+
; CHECK-NEXT: ldr s1, [x0], #4
111+
; CHECK-NEXT: fadd s0, s0, s1
112+
; CHECK-NEXT: fcmp s0, #0.0
113+
; CHECK-NEXT: b.mi .LBB2_4
114+
; CHECK-NEXT: // %bb.2: // %for.inc
115+
; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1
116+
; CHECK-NEXT: subs x1, x1, #1
117+
; CHECK-NEXT: b.ne .LBB2_1
118+
; CHECK-NEXT: .LBB2_3: // %cleanup
119+
; CHECK-NEXT: ret
120+
; CHECK-NEXT: .LBB2_4:
121+
; CHECK-NEXT: movi d0, #0000000000000000
122+
; CHECK-NEXT: ret
123+
entry:
124+
%cmp1 = icmp sgt i64 %n, 0
125+
br i1 %cmp1, label %for.body, label %cleanup
126+
127+
for.body:
128+
%iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
129+
%accum = phi float [ %add, %for.inc ], [ 0.000000e+00, %entry ]
130+
%ptr = phi ptr [ %ptr.next, %for.inc ], [ %p, %entry ]
131+
%val = load float, ptr %ptr, align 4
132+
%ptr.next = getelementptr inbounds nuw i8, ptr %ptr, i64 4
133+
%add = fadd float %accum, %val
134+
%cmp2 = fcmp olt float %add, 0.000000e+00
135+
br i1 %cmp2, label %cleanup, label %for.inc
136+
137+
for.inc:
138+
%iv.next = add nuw nsw i64 %iv, 1
139+
%exitcond = icmp eq i64 %iv.next, %n
140+
br i1 %exitcond, label %cleanup, label %for.body
141+
142+
cleanup:
143+
%ret = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %for.body ], [ %add, %for.inc ]
144+
ret float %ret
145+
}
146+
147+
define float @float_initially_offset(ptr %p, i64 %n) {
148+
; CHECK-LABEL: float_initially_offset:
149+
; CHECK: // %bb.0: // %entry
150+
; CHECK-NEXT: movi d0, #0000000000000000
151+
; CHECK-NEXT: cmp x1, #1
152+
; CHECK-NEXT: b.lt .LBB3_3
153+
; CHECK-NEXT: .LBB3_1: // %for.body
154+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
155+
; CHECK-NEXT: ldr s1, [x0], #4
156+
; CHECK-NEXT: fadd s0, s0, s1
157+
; CHECK-NEXT: fcmp s0, #0.0
158+
; CHECK-NEXT: b.mi .LBB3_4
159+
; CHECK-NEXT: // %bb.2: // %for.cond
160+
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
161+
; CHECK-NEXT: subs x1, x1, #1
162+
; CHECK-NEXT: b.ne .LBB3_1
163+
; CHECK-NEXT: .LBB3_3: // %cleanup
164+
; CHECK-NEXT: ret
165+
; CHECK-NEXT: .LBB3_4:
166+
; CHECK-NEXT: movi d0, #0000000000000000
167+
; CHECK-NEXT: ret
168+
entry:
169+
%cmp1 = icmp sgt i64 %n, 0
170+
br i1 %cmp1, label %for.body, label %cleanup
171+
172+
for.cond:
173+
%iv.next = add nuw nsw i64 %iv, 1
174+
%exitcond = icmp eq i64 %iv.next, %n
175+
br i1 %exitcond, label %cleanup, label %for.body
176+
177+
for.body:
178+
%iv = phi i64 [ %iv.next, %for.cond ], [ 0, %entry ]
179+
%accum = phi float [ %add, %for.cond ], [ 0.000000e+00, %entry ]
180+
%arrayidx = getelementptr inbounds nuw float, ptr %p, i64 %iv
181+
%val = load float, ptr %arrayidx, align 4
182+
%add = fadd float %accum, %val
183+
%cmp2 = fcmp olt float %add, 0.000000e+00
184+
br i1 %cmp2, label %cleanup, label %for.cond
185+
186+
cleanup:
187+
%ret = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %for.body ], [ %add, %for.cond ]
188+
ret float %ret
189+
}

0 commit comments

Comments
 (0)