Skip to content

Commit 6515db2

Browse files
committed
[InstCombine] Simplify cttz/ctlz + icmp ugt/ult
Followup to D55745, this time handling comparisons with ugt and ult predicates (which are the canonical forms for non-equality predicates). For ctlz we can convert into a simple icmp, for cttz we can convert into a mask check. Differential Revision: https://reviews.llvm.org/D56355 llvm-svn: 351645
1 parent 0b02907 commit 6515db2

File tree

3 files changed

+88
-37
lines changed

3 files changed

+88
-37
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2610,8 +2610,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
26102610
return I;
26112611
}
26122612

2613-
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
2614-
return I;
2613+
if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
2614+
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
2615+
return I;
26152616

26162617
return nullptr;
26172618
}
@@ -2755,14 +2756,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
27552756
return nullptr;
27562757
}
27572758

2758-
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
2759-
Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
2760-
const APInt &C) {
2761-
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
2762-
if (!II || !Cmp.isEquality())
2763-
return nullptr;
2764-
2765-
// Handle icmp {eq|ne} <intrinsic>, Constant.
2759+
/// Fold an equality icmp with LLVM intrinsic and constant operand.
2760+
Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
2761+
IntrinsicInst *II,
2762+
const APInt &C) {
27662763
Type *Ty = II->getType();
27672764
unsigned BitWidth = C.getBitWidth();
27682765
switch (II->getIntrinsicID()) {
@@ -2822,6 +2819,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
28222819
return nullptr;
28232820
}
28242821

2822+
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
2823+
Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
2824+
IntrinsicInst *II,
2825+
const APInt &C) {
2826+
if (Cmp.isEquality())
2827+
return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
2828+
2829+
Type *Ty = II->getType();
2830+
unsigned BitWidth = C.getBitWidth();
2831+
switch (II->getIntrinsicID()) {
2832+
case Intrinsic::ctlz: {
2833+
// ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
2834+
if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
2835+
unsigned Num = C.getLimitedValue();
2836+
APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
2837+
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
2838+
II->getArgOperand(0), ConstantInt::get(Ty, Limit));
2839+
}
2840+
2841+
// ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
2842+
if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
2843+
C.uge(1) && C.ule(BitWidth)) {
2844+
unsigned Num = C.getLimitedValue();
2845+
APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
2846+
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
2847+
II->getArgOperand(0), ConstantInt::get(Ty, Limit));
2848+
}
2849+
break;
2850+
}
2851+
case Intrinsic::cttz: {
2852+
// Limit to one use to ensure we don't increase instruction count.
2853+
if (!II->hasOneUse())
2854+
return nullptr;
2855+
2856+
// cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
2857+
if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
2858+
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
2859+
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
2860+
Builder.CreateAnd(II->getArgOperand(0), Mask),
2861+
ConstantInt::getNullValue(Ty));
2862+
}
2863+
2864+
// cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
2865+
if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
2866+
C.uge(1) && C.ule(BitWidth)) {
2867+
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
2868+
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
2869+
Builder.CreateAnd(II->getArgOperand(0), Mask),
2870+
ConstantInt::getNullValue(Ty));
2871+
}
2872+
break;
2873+
}
2874+
default:
2875+
break;
2876+
}
2877+
2878+
return nullptr;
2879+
}
2880+
28252881
/// Handle icmp with constant (but not simple integer constant) RHS.
28262882
Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
28272883
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
903903
Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
904904
BinaryOperator *BO,
905905
const APInt &C);
906-
Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
906+
Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
907+
const APInt &C);
908+
Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
909+
const APInt &C);
907910

908911
// Helpers of visitSelectInst().
909912
Instruction *foldSelectExtConst(SelectInst &Sel);

llvm/test/Transforms/InstCombine/cmp-intrinsic.ll

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,7 @@ define i1 @ctlz_ugt_zero_i32(i32 %x) {
149149

150150
define i1 @ctlz_ugt_one_i32(i32 %x) {
151151
; CHECK-LABEL: @ctlz_ugt_one_i32(
152-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
153-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 1
152+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824
154153
; CHECK-NEXT: ret i1 [[CMP]]
155154
;
156155
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -160,8 +159,7 @@ define i1 @ctlz_ugt_one_i32(i32 %x) {
160159

161160
define i1 @ctlz_ugt_other_i32(i32 %x) {
162161
; CHECK-LABEL: @ctlz_ugt_other_i32(
163-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
164-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
162+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768
165163
; CHECK-NEXT: ret i1 [[CMP]]
166164
;
167165
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -173,7 +171,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
173171
; CHECK-LABEL: @ctlz_ugt_other_multiuse_i32(
174172
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
175173
; CHECK-NEXT: store i32 [[LZ]], i32* [[P:%.*]], align 4
176-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
174+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 32768
177175
; CHECK-NEXT: ret i1 [[CMP]]
178176
;
179177
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -184,8 +182,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
184182

185183
define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) {
186184
; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32(
187-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
188-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 31
185+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
189186
; CHECK-NEXT: ret i1 [[CMP]]
190187
;
191188
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -205,8 +202,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) {
205202

206203
define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) {
207204
; CHECK-LABEL: @ctlz_ult_other_v2i32(
208-
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
209-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
205+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
210206
; CHECK-NEXT: ret <2 x i1> [[CMP]]
211207
;
212208
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -218,7 +214,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
218214
; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32(
219215
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
220216
; CHECK-NEXT: store <2 x i32> [[LZ]], <2 x i32>* [[P:%.*]], align 8
221-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
217+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], <i32 65535, i32 65535>
222218
; CHECK-NEXT: ret <2 x i1> [[CMP]]
223219
;
224220
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -229,8 +225,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
229225

230226
define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
231227
; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32(
232-
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
233-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 31, i32 31>
228+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
234229
; CHECK-NEXT: ret <2 x i1> [[CMP]]
235230
;
236231
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -240,8 +235,7 @@ define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
240235

241236
define <2 x i1> @ctlz_ult_bitwidth_v2i32(<2 x i32> %x) {
242237
; CHECK-LABEL: @ctlz_ult_bitwidth_v2i32(
243-
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
244-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 32, i32 32>
238+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
245239
; CHECK-NEXT: ret <2 x i1> [[CMP]]
246240
;
247241
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -359,8 +353,8 @@ define i1 @cttz_ugt_zero_i33(i33 %x) {
359353

360354
define i1 @cttz_ugt_one_i33(i33 %x) {
361355
; CHECK-LABEL: @cttz_ugt_one_i33(
362-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
363-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 1
356+
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 3
357+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
364358
; CHECK-NEXT: ret i1 [[CMP]]
365359
;
366360
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -370,8 +364,8 @@ define i1 @cttz_ugt_one_i33(i33 %x) {
370364

371365
define i1 @cttz_ugt_other_i33(i33 %x) {
372366
; CHECK-LABEL: @cttz_ugt_other_i33(
373-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
374-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 16
367+
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 131071
368+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
375369
; CHECK-NEXT: ret i1 [[CMP]]
376370
;
377371
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -394,8 +388,7 @@ define i1 @cttz_ugt_other_multiuse_i33(i33 %x, i33* %p) {
394388

395389
define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) {
396390
; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33(
397-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
398-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 32
391+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0
399392
; CHECK-NEXT: ret i1 [[CMP]]
400393
;
401394
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -415,8 +408,8 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) {
415408

416409
define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) {
417410
; CHECK-LABEL: @cttz_ult_other_v2i32(
418-
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
419-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 16, i32 16>
411+
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
412+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
420413
; CHECK-NEXT: ret <2 x i1> [[CMP]]
421414
;
422415
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
@@ -439,8 +432,8 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
439432

440433
define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
441434
; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32(
442-
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
443-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 31, i32 31>
435+
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
436+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
444437
; CHECK-NEXT: ret <2 x i1> [[CMP]]
445438
;
446439
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
@@ -450,8 +443,7 @@ define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
450443

451444
define <2 x i1> @cttz_ult_bitwidth_v2i32(<2 x i32> %x) {
452445
; CHECK-LABEL: @cttz_ult_bitwidth_v2i32(
453-
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
454-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 32, i32 32>
446+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
455447
; CHECK-NEXT: ret <2 x i1> [[CMP]]
456448
;
457449
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)

0 commit comments

Comments
 (0)