Skip to content

Commit c1cf2c8

Browse files
bcheng0127igcbot
authored andcommitted
BCR tuning
BCR tuning
1 parent d55c714 commit c1cf2c8

File tree

6 files changed

+51
-20
lines changed

6 files changed

+51
-20
lines changed

visa/G4_Kernel.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,12 @@ void G4_Kernel::calculateSimdSize() {
618618
computeChannelSlicing();
619619
}
620620

621+
bool G4_Kernel::reachMaxGRF() {
622+
if (numRegTotal >= grfMode.getMaxGRF())
623+
return true;
624+
return false;
625+
}
626+
621627
//
622628
// Updates kernel's related structures to large GRF
623629
//

visa/G4_Kernel.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ class G4_Kernel {
691691
void setName(const char *n) { name = n; }
692692
const char *getName() const { return name; }
693693

694+
bool reachMaxGRF();
694695
bool updateKernelToLargerGRF();
695696
void updateKernelByRegPressure(unsigned regPressure);
696697
bool updateKernelFromNumGRFAttr();

visa/GraphColor.cpp

+32-18
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ void BankConflictPass::setupBankConflictsForBBTGL(G4_BB *bb,
10221022
} else {
10231023
setupBankConflictsforMad(inst);
10241024
}
1025-
} else if (gra.kernel.getOption(vISA_forceBCR) && !forGlobal &&
1025+
} else if (gra.forceBCR && !forGlobal &&
10261026
inst->getNumSrc() == 2) {
10271027
threeSourceInstNum++;
10281028
setupBankConflictsforMad(inst);
@@ -3499,7 +3499,7 @@ bool Augmentation::markNonDefaultMaskDef() {
34993499
nonDefaultMaskDefFound = true;
35003500
}
35013501

3502-
if (kernel.getOption(vISA_forceBCR) &&
3502+
if ((gra.favorBCR || gra.forceBCR) &&
35033503
gra.getBankConflict(dcl) != BANK_CONFLICT_NONE) {
35043504
gra.setAugmentationMask(dcl, AugmentationMasks::NonDefault);
35053505
nonDefaultMaskDefFound = true;
@@ -6723,7 +6723,7 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF,
67236723
// pass) then abort on spill
67246724
//
67256725
if ((heuristic == ROUND_ROBIN ||
6726-
(doBankConflict && !kernel.getOption(vISA_forceBCR))) &&
6726+
(doBankConflict && !gra.forceBCR)) &&
67276727
(lr->getRegKind() == G4_GRF || lr->getRegKind() == G4_FLAG)) {
67286728
return false;
67296729
} else if (kernel.fg.isPseudoDcl(dcl)) {
@@ -7345,11 +7345,9 @@ bool GraphColor::regAlloc(bool doBankConflictReduction,
73457345
return false;
73467346
}
73477347

7348-
if (!kernel.getOption(vISA_forceBCR)) {
7349-
if (!success && doBankConflictReduction) {
7350-
resetTemporaryRegisterAssignments();
7351-
assignColors(FIRST_FIT);
7352-
}
7348+
if (!success && doBankConflictReduction && !gra.forceBCR) {
7349+
resetTemporaryRegisterAssignments();
7350+
assignColors(FIRST_FIT);
73537351
}
73547352
}
73557353
} else {
@@ -10111,6 +10109,16 @@ bool GlobalRA::tryHybridRA() {
1011110109
return true;
1011210110
}
1011310111

10112+
//Skip hybridRA if BCR is needed
10113+
if (favorBCR) {
10114+
lra.undoLocalRAAssignments(true);
10115+
// Restore alignment in case LRA modified it
10116+
copyAlignment();
10117+
//reset favorBCR
10118+
favorBCR = false;
10119+
return false;
10120+
}
10121+
1011410122
if (useHybridRAwithSpill) {
1011510123
insertPhyRegDecls();
1011610124
} else {
@@ -10431,14 +10439,14 @@ std::pair<bool, bool> GlobalRA::bankConflict() {
1043110439
bool doBankConflictReduction = false, highInternalConflict = false;
1043210440
if (builder.getOption(vISA_LocalBankConflictReduction) &&
1043310441
builder.hasBankCollision()) {
10434-
bool reduceBCInRR = false;
10435-
bool reduceBCInTAandFF = false;
10442+
bool reduceBC = false;
10443+
bool threeSouceBCR = false;
1043610444
BankConflictPass bc(*this, true);
1043710445

10438-
reduceBCInRR = bc.setupBankConflictsForKernel(
10439-
true, reduceBCInTAandFF, SECOND_HALF_BANK_START_GRF * 2,
10446+
reduceBC = bc.setupBankConflictsForKernel(
10447+
true, threeSouceBCR, SECOND_HALF_BANK_START_GRF * 2,
1044010448
highInternalConflict);
10441-
doBankConflictReduction = reduceBCInRR && reduceBCInTAandFF;
10449+
doBankConflictReduction = reduceBC && threeSouceBCR;
1044210450
}
1044310451
return std::make_pair(doBankConflictReduction, highInternalConflict);
1044410452
}
@@ -10769,11 +10777,8 @@ GlobalRA::insertSpillCode(bool enableSpillSpaceCompression,
1076910777

1077010778
bool GlobalRA::rerunGRAIter(bool rerunGRA)
1077110779
{
10772-
if (getIterNo() == 0 && (rerunGRA || kernel.getOption(vISA_forceBCR))) {
10773-
if (kernel.getOption(vISA_forceBCR)) {
10774-
// FIXME: We shouldn't modify options. Use local bool flag instead.
10775-
kernel.getOptions()->setOption(vISA_forceBCR, false);
10776-
}
10780+
if (getIterNo() == 0 && (rerunGRA || forceBCR)) {
10781+
forceBCR = false;
1077710782
return true;
1077810783
}
1077910784
return false;
@@ -10991,6 +10996,15 @@ int GlobalRA::coloringRegAlloc() {
1099110996
if (!fastCompile) {
1099210997
rpe.run();
1099310998
writeVerboseRPEStats(rpe);
10999+
11000+
// If the register pressure is less than TotalGRF - 16, mostly the
11001+
// BCR register allocation will success. In this case we do favor BCR
11002+
// register allocation in augmentation and assignColors and VRT GRF
11003+
// increasing
11004+
favorBCR |= doBankConflictReduction && kernel.useAutoGRFSelection() &&
11005+
builder.favorBCR() && kernel.getOption(vISA_RoundRobin) &&
11006+
!hasStackCall &&
11007+
(rpe.getMaxRP() < kernel.getNumRegTotal() - 16);
1099411008
}
1099511009
GraphColor coloring(liveAnalysis, false, forceSpill);
1099611010

visa/GraphColor.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -1485,6 +1485,8 @@ class GlobalRA {
14851485
bool useFastRA = false;
14861486
bool useHybridRAwithSpill = false;
14871487
bool useLocalRA = false;
1488+
bool favorBCR = false;
1489+
bool forceBCR = false;
14881490
uint32_t nextSpillOffset = 0;
14891491
uint32_t scratchOffset = 0;
14901492

@@ -1918,7 +1920,7 @@ class GlobalRA {
19181920
if (kernel.getOptions()->getOption(vISA_VerifyAugmentation)) {
19191921
verifyAugmentation = std::make_unique<VerifyAugmentation>();
19201922
}
1921-
1923+
forceBCR = kernel.getOption(vISA_forceBCR);
19221924
// Set callWA condition.
19231925
// Call return ip and mask need wa only for non-entry functions. As call
19241926
// WA also needs a temp, we conservatively add WA for

visa/HWCaps.inc

+4
Original file line numberDiff line numberDiff line change
@@ -881,4 +881,8 @@ bool hasMulMacRSIssue() const {
881881

882882
return VISA_WA_CHECK(getPWaTable(), Wa_18035690555);
883883
}
884+
885+
bool favorBCR() {
886+
return false;
887+
}
884888
// end HW capabilities

visa/LocalRA.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ bool LocalRA::localRA() {
428428
}
429429

430430
if (!doRoundRobin) {
431-
if (kernel.getOption(vISA_forceBCR) && doBCR) {
431+
if (gra.forceBCR && doBCR) {
432432
RA_TRACE(std::cout << "\t--first-fit BCR RA\n");
433433
needGlobalRA = localRAPass(false, doSplitLLR);
434434
}
@@ -444,6 +444,10 @@ bool LocalRA::localRA() {
444444
specialAlign();
445445
needGlobalRA = localRAPass(false, doSplitLLR);
446446
}
447+
gra.favorBCR |= doBCR && kernel.useAutoGRFSelection() &&
448+
builder.favorBCR() && kernel.getOption(vISA_RoundRobin) &&
449+
!kernel.fg.getHasStackCalls() &&
450+
!kernel.fg.getIsStackCallFunc();
447451
}
448452

449453
if (needGlobalRA == false) {

0 commit comments

Comments
 (0)