Skip to content

Commit 7fc9203

Browse files
bowenxue-inteligcbot
authored andcommitted
IntDivRemIncrementReduction Optimization Pass
An unrolled loop with udiv/urem pairs gets multiple consecutive udiv/urem with the dividend incrementing by 1. Instead of replacing each with the precompiled udiv/urem, utilize previous results to simplify computation. This also works for nested udiv/urem where the dividend is the quotient from a previous udiv/urem.
1 parent c17fc3b commit 7fc9203

File tree

12 files changed

+960
-11
lines changed

12 files changed

+960
-11
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ SPDX-License-Identifier: MIT
9999
#include "Compiler/Optimizer/MCSOptimization.hpp"
100100
#include "Compiler/Optimizer/GatingSimilarSamples.hpp"
101101
#include "Compiler/Optimizer/IntDivConstantReduction.hpp"
102+
#include "Compiler/Optimizer/IntDivRemIncrementReduction.hpp"
102103
#include "Compiler/Optimizer/IntDivRemCombine.hpp"
103104
#include "Compiler/Optimizer/SynchronizationObjectCoalescing.hpp"
104105
#include "Compiler/Optimizer/BarrierControlFlowOptimization.hpp"
@@ -1868,6 +1869,8 @@ void OptimizeIR(CodeGenContext* const pContext)
18681869
mpm.add( createWaveAllJointReduction() );
18691870
}
18701871

1872+
mpm.add(llvm::createEarlyCSEPass());
1873+
18711874
if (IGC_IS_FLAG_ENABLED(EnableIntDivRemCombine)) {
18721875
// simplify rem if the quotient is availble
18731876
//
@@ -1887,6 +1890,10 @@ void OptimizeIR(CodeGenContext* const pContext)
18871890
// more efficient sequences of multiplies, shifts, and adds
18881891
mpm.add(createIntDivConstantReductionPass());
18891892
}
1893+
1894+
if (IGC_IS_FLAG_ENABLED(EnableIntDivRemIncrementReduction)) {
1895+
mpm.add(createIntDivRemIncrementReductionPass());
1896+
}
18901897
GFX_ONLY_PASS { mpm.add(createMergeMemFromBranchOptPass()); }
18911898

18921899
if (IGC_IS_FLAG_DISABLED(DisableLoadSinking) &&

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,18 +1066,31 @@ void CustomSafeOptPass::visitBfi(llvm::CallInst* inst)
10661066
ConstantInt* offsetV = dyn_cast<ConstantInt>(inst->getOperand(1));
10671067
if (widthV && offsetV)
10681068
{
1069+
ConstantInt* baseV = dyn_cast<ConstantInt>(inst->getOperand(3));
10691070
// transformation is beneficial if src3 is constant or if the offset is zero
1070-
if (isa<ConstantInt>(inst->getOperand(3)) || offsetV->isZero())
1071+
if (baseV || offsetV->isZero())
10711072
{
10721073
unsigned int width = static_cast<unsigned int>(widthV->getZExtValue());
10731074
unsigned int offset = static_cast<unsigned int>(offsetV->getZExtValue());
10741075
unsigned int bitMask = ((1 << width) - 1) << offset;
10751076
IRBuilder<> builder(inst);
10761077
// dst = ((src2 << offset) & bitmask) | (src3 & ~bitmask)
1077-
Value* firstTerm = builder.CreateShl(inst->getOperand(2), offsetV);
1078+
Value* firstTerm = nullptr;
1079+
Value* dst = nullptr;
1080+
if( offset != 0 ) {
1081+
firstTerm = builder.CreateShl(inst->getOperand(2), offsetV);
1082+
} else {
1083+
firstTerm = inst->getOperand(2);
1084+
}
10781085
firstTerm = builder.CreateAnd(firstTerm, builder.getInt32(bitMask));
1079-
Value* secondTerm = builder.CreateAnd(inst->getOperand(3), builder.getInt32(~bitMask));
1080-
Value* dst = builder.CreateOr(firstTerm, secondTerm);
1086+
1087+
if (baseV && baseV->isZero()) {
1088+
dst = firstTerm;
1089+
} else {
1090+
auto* secondTerm = builder.CreateAnd(inst->getOperand(3), builder.getInt32(~bitMask));
1091+
dst = builder.CreateOr(firstTerm, secondTerm);
1092+
}
1093+
10811094
inst->replaceAllUsesWith(dst);
10821095
inst->eraseFromParent();
10831096
}

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ void initializeFCmpPaternMatchPass(llvm::PassRegistry&);
245245
void initializeCodeAssumptionPass(llvm::PassRegistry&);
246246
void initializeIGCInstructionCombiningPassPass(llvm::PassRegistry&);
247247
void initializeIntDivConstantReductionPass(llvm::PassRegistry&);
248+
void initializeIntDivRemIncrementReductionPass(llvm::PassRegistry&);
248249
void initializeIntDivRemCombinePass(llvm::PassRegistry&);
249250
void initializeGenRotatePass(llvm::PassRegistry&);
250251
void initializeSynchronizationObjectCoalescingPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ set(IGC_BUILD__SRC__Optimizer
3636
"${CMAKE_CURRENT_SOURCE_DIR}/BarrierControlFlowOptimization.cpp"
3737
"${CMAKE_CURRENT_SOURCE_DIR}/WaveShuffleIndexSinking.cpp"
3838
"${CMAKE_CURRENT_SOURCE_DIR}/WaveAllJointReduction.cpp"
39+
"${CMAKE_CURRENT_SOURCE_DIR}/IntDivRemIncrementReduction.cpp"
3940
)
4041

4142
set(IGC_BUILD__SRC__Compiler_Optimizer
@@ -63,8 +64,9 @@ set(IGC_BUILD__HDR__Optimizer
6364
"${CMAKE_CURRENT_SOURCE_DIR}/ValueTracker.h"
6465
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeValueVectorExtractPass.h"
6566
"${CMAKE_CURRENT_SOURCE_DIR}/BarrierControlFlowOptimization.hpp"
66-
"${CMAKE_CURRENT_SOURCE_DIR}/WaveShuffleIndexSinking.cpp"
67+
"${CMAKE_CURRENT_SOURCE_DIR}/WaveShuffleIndexSinking.hpp"
6768
"${CMAKE_CURRENT_SOURCE_DIR}/WaveAllJointReduction.hpp"
69+
"${CMAKE_CURRENT_SOURCE_DIR}/IntDivRemIncrementReduction.hpp"
6870
)
6971

7072
set(IGC_BUILD__HDR__Optimizer

0 commit comments

Comments
 (0)