Skip to content

Commit 0216b0c

Browse files
[SimplifyCFG] Relax cttz cost check in simplifySwitchOfPowersOfTwo
We should be able to allow `simplifySwitchOfPowersOfTwo` transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient). Also, let the simplification proceed even if the default case may be reachable.
1 parent 1428624 commit 0216b0c

File tree

2 files changed

+47
-39
lines changed

2 files changed

+47
-39
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
71987198
/// will be transformed to:
71997199
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
72007200
///
7201-
/// This transformation allows better lowering and could allow transforming into
7202-
/// a lookup table.
7201+
/// This transformation allows better lowering and may transform the switch
7202+
/// instruction into a sequence of bit manipulation and a smaller
7203+
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7204+
/// address of the jump target, and indirectly jump to it).
72037205
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
72047206
const DataLayout &DL,
72057207
const TargetTransformInfo &TTI) {
@@ -7211,26 +7213,18 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
72117213
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
72127214
return false;
72137215

7214-
const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7215-
IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7216-
{Condition, ConstantInt::getTrue(Context)}),
7217-
TTI::TCK_SizeAndLatency);
7218-
7219-
if (CttzIntrinsicCost > TTI::TCC_Basic)
7220-
// Inserting intrinsic is too expensive.
7216+
// Ensure trailing zeroes count intrinsic emission is not too expensive.
7217+
IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7218+
{Condition, ConstantInt::getTrue(Context)});
7219+
if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7220+
TTI::TCC_Basic * 2)
72217221
return false;
72227222

72237223
// Only bother with this optimization if there are more than 3 switch cases.
7224-
// SDAG will only bother creating jump tables for 4 or more cases.
7224+
// SDAG will start emitting jump tables for 4 or more cases.
72257225
if (SI->getNumCases() < 4)
72267226
return false;
72277227

7228-
// We perform this optimization only for switches with
7229-
// unreachable default case.
7230-
// This assumtion will save us from checking if `Condition` is a power of two.
7231-
if (!SI->defaultDestUnreachable())
7232-
return false;
7233-
72347228
// Check that switch cases are powers of two.
72357229
SmallVector<uint64_t, 4> Values;
72367230
for (const auto &Case : SI->cases()) {

llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -60,30 +60,44 @@ return:
6060
ret i32 %p
6161
}
6262

63-
; Check that switch's of powers of two range is not reduced if default case is reachable
63+
; Check that switch's of powers of two range is not reduced if default case is reachable,
64+
; unless Zbb extension is on.
6465
define i32 @switch_of_powers_reachable_default(i32 %x) {
65-
; CHECK-LABEL: @switch_of_powers_reachable_default(
66-
; CHECK-NEXT: entry:
67-
; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
68-
; CHECK-NEXT: i32 1, label [[BB1:%.*]]
69-
; CHECK-NEXT: i32 8, label [[BB2:%.*]]
70-
; CHECK-NEXT: i32 16, label [[BB3:%.*]]
71-
; CHECK-NEXT: i32 32, label [[BB4:%.*]]
72-
; CHECK-NEXT: i32 64, label [[BB5:%.*]]
73-
; CHECK-NEXT: ]
74-
; CHECK: bb1:
75-
; CHECK-NEXT: br label [[RETURN]]
76-
; CHECK: bb2:
77-
; CHECK-NEXT: br label [[RETURN]]
78-
; CHECK: bb3:
79-
; CHECK-NEXT: br label [[RETURN]]
80-
; CHECK: bb4:
81-
; CHECK-NEXT: br label [[RETURN]]
82-
; CHECK: bb5:
83-
; CHECK-NEXT: br label [[RETURN]]
84-
; CHECK: return:
85-
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
86-
; CHECK-NEXT: ret i32 [[P]]
66+
; RV64I-LABEL: @switch_of_powers_reachable_default(
67+
; RV64I-NEXT: entry:
68+
; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
69+
; RV64I-NEXT: i32 1, label [[BB1:%.*]]
70+
; RV64I-NEXT: i32 8, label [[BB2:%.*]]
71+
; RV64I-NEXT: i32 16, label [[BB3:%.*]]
72+
; RV64I-NEXT: i32 32, label [[BB4:%.*]]
73+
; RV64I-NEXT: i32 64, label [[BB5:%.*]]
74+
; RV64I-NEXT: ]
75+
; RV64I: bb1:
76+
; RV64I-NEXT: br label [[RETURN]]
77+
; RV64I: bb2:
78+
; RV64I-NEXT: br label [[RETURN]]
79+
; RV64I: bb3:
80+
; RV64I-NEXT: br label [[RETURN]]
81+
; RV64I: bb4:
82+
; RV64I-NEXT: br label [[RETURN]]
83+
; RV64I: bb5:
84+
; RV64I-NEXT: br label [[RETURN]]
85+
; RV64I: return:
86+
; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
87+
; RV64I-NEXT: ret i32 [[P]]
88+
;
89+
; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
90+
; RV64ZBB-NEXT: entry:
91+
; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
92+
; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
93+
; RV64ZBB-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
94+
; RV64ZBB: switch.lookup:
95+
; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i32 0, i32 [[TMP0]]
96+
; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
97+
; RV64ZBB-NEXT: br label [[RETURN]]
98+
; RV64ZBB: return:
99+
; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ -1, [[ENTRY:%.*]] ]
100+
; RV64ZBB-NEXT: ret i32 [[P]]
87101
;
88102
entry:
89103
switch i32 %x, label %default_case [

0 commit comments

Comments
 (0)