Skip to content

Commit b23eb1e

Browse files
bcheng0127igcbot
authored andcommitted
gather send update
Gather send update
1 parent d7e78d5 commit b23eb1e

File tree

10 files changed

+81
-32
lines changed

10 files changed

+81
-32
lines changed

visa/BuildIRImpl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2177,7 +2177,9 @@ G4_INST *IR_Builder::createIntrinsicAddrMovInst(
21772177
G4_Operand *src1, G4_Operand *src2, G4_Operand *src3, G4_Operand *src4,
21782178
G4_Operand *src5, G4_Operand *src6, G4_Operand *src7, bool addToInstList) {
21792179
G4_INST *i = nullptr;
2180-
vISA_ASSERT(intrinId == Intrinsic::PseudoAddrMov, "expect pseudo_mov op");
2180+
vISA_ASSERT(intrinId == Intrinsic::PseudoAddrMov ||
2181+
intrinId == Intrinsic::PseudoAddrMovW,
2182+
"expect pseudo_mov op");
21812183

21822184
i = new (mem) G4_PseudoAddrMovIntrinsic(*this, intrinId, dst, src0, src1,
21832185
src2, src3, src4, src5, src6, src7);

visa/G4_IR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7806,6 +7806,7 @@ void G4_InstIntrinsic::computeRightBound(G4_Operand *opnd) {
78067806
opnd->setRightBound(opnd->left_bound + getDstByteSize() - 1);
78077807
break;
78087808
case Intrinsic::PseudoAddrMov:
7809+
case Intrinsic::PseudoAddrMovW:
78097810
if (opnd != getDst()) { // Source operand only, dst operand will be handled
78107811
// as normal dst
78117812
opnd->setLeftBound(opnd->left_bound +

visa/G4_IR.hpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ class G4_INST {
349349
bool isFillIntrinsic() const;
350350
G4_FillIntrinsic *asFillIntrinsic() const;
351351
bool isPseudoAddrMovIntrinsic() const;
352+
bool isPseudoAddrMovWIntrinsic() const;
352353
bool isSplitIntrinsic() const;
353354
bool isCallerSave() const;
354355
bool isCallerRestore() const;
@@ -466,12 +467,6 @@ class G4_INST {
466467
return (G4_InstIntrinsic *)this;
467468
}
468469

469-
G4_PseudoAddrMovIntrinsic *asPseudoAddrMovIntrinsic() const {
470-
vISA_ASSERT(isPseudoAddrMovIntrinsic(), "not a fill intrinsic");
471-
return const_cast<G4_PseudoAddrMovIntrinsic *>(
472-
reinterpret_cast<const G4_PseudoAddrMovIntrinsic *>(this));
473-
}
474-
475470
const G4_InstSend *asSendInst() const {
476471
if (!isSend()) {
477472
return nullptr;
@@ -1310,6 +1305,7 @@ class G4_InstSend : public G4_INST {
13101305
}
13111306
}
13121307

1308+
13131309
G4_Operand *getMsgDescOperand() const {
13141310
return isSplitSend() ? srcs[2] : srcs[1];
13151311
}
@@ -1400,6 +1396,7 @@ enum class Intrinsic {
14001396
CalleeRestore,
14011397
FlagSpill,
14021398
PseudoAddrMov,
1399+
PseudoAddrMovW,
14031400
NamedBarrierWA,
14041401
BarrierWA,
14051402
IEEEExceptionTrap,
@@ -1454,11 +1451,14 @@ class G4_InstIntrinsic : public G4_INST {
14541451
{Intrinsic::FlagSpill, "flagSpill", 0, 1, Phase::RA, 0},
14551452
{Intrinsic::PseudoAddrMov, "pseudo_addr_mov", 1, 8,
14561453
Phase::BinaryEncoding, 0},
1454+
{Intrinsic::PseudoAddrMovW, "pseudo_addr_mov_w", 1, 4,
1455+
Phase::BinaryEncoding, 0},
14571456
{Intrinsic::NamedBarrierWA, "namedBarrierWA", 1, 1, Phase::SWSB, 0},
14581457
{Intrinsic::BarrierWA, "barrierWA", 1, 0, Phase::SWSB, 0},
14591458
{Intrinsic::IEEEExceptionTrap, "ieee_exception_trap", 1, 0,
14601459
Phase::SWSB, 0},
1461-
{Intrinsic::Breakpoint, "breakpoint", 0, 0, Phase::SWSB, 1ull << HasSideEffects},
1460+
{Intrinsic::Breakpoint, "breakpoint", 0, 0, Phase::SWSB,
1461+
1ull << HasSideEffects},
14621462
};
14631463

14641464
public:
@@ -1617,7 +1617,13 @@ inline G4_FillIntrinsic *G4_INST::asFillIntrinsic() const {
16171617

16181618
inline bool G4_INST::isPseudoAddrMovIntrinsic() const {
16191619
return isIntrinsic() &&
1620-
asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoAddrMov;
1620+
(asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoAddrMov ||
1621+
asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoAddrMovW);
1622+
}
1623+
1624+
inline bool G4_INST::isPseudoAddrMovWIntrinsic() const {
1625+
return isIntrinsic() &&
1626+
asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoAddrMovW;
16211627
}
16221628

16231629
inline bool G4_INST::isSplitIntrinsic() const {

visa/G4_Kernel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1801,8 +1801,11 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
18011801
const iga::Model *igaModel =
18021802
iga::Model::LookupModel(iga::ToPlatform(igaPlatform));
18031803
iga::SWSB_ENCODE_MODE swsbEncodeMode = igaModel->getSWSBEncodeMode();
1804+
1805+
18041806
KernelView kv(igaPlatform, binary, binarySize, swsbEncodeMode, errBuf,
1805-
ERROR_STRING_MAX_LENGTH);
1807+
ERROR_STRING_MAX_LENGTH
1808+
);
18061809

18071810
if (!kv.decodeSucceeded()) {
18081811
const char *MSG =

visa/GraphColor.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9051,6 +9051,8 @@ void ForbiddenRegs::generateReservedGRFForbidden(
90519051
bool hasStackCall = builder.kernel.fg.getHasStackCalls() ||
90529052
builder.kernel.fg.getIsStackCallFunc();
90539053
uint32_t reservedGRFNum = builder.getuint32Option(vISA_ReservedGRFNum);
9054+
uint32_t reservedFromFrontGRFNum =
9055+
builder.getuint32Option(vISA_ReservedFromFrontGRFNum);
90549056
unsigned int stackCallRegSize =
90559057
hasStackCall ? builder.kernel.stackCall.numReservedABIGRF() : 0;
90569058

@@ -9089,6 +9091,10 @@ void ForbiddenRegs::generateReservedGRFForbidden(
90899091
forbiddenVec[index].set(largestNoneReservedReg - i, true);
90909092
}
90919093

9094+
for (unsigned int i = 0; i < reservedFromFrontGRFNum; i++) {
9095+
forbiddenVec[index].set(i, true);
9096+
}
9097+
90929098
auto &fg = builder.kernel.fg;
90939099
if (fg.reserveSR) {
90949100
forbiddenVec[index].set(

visa/Optimizer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,11 @@ void Optimizer::removePseudoMov() {
13091309
unsigned int regNum =
13101310
(static_cast<G4_Greg *>(regVar->getPhyReg()))->getRegNum();
13111311
regNum += src->asAddrExp()->getOffset() / kernel.getGRFSize();
1312-
value |= (uint64_t)regNum << 8 * i;
1312+
if (inst->isPseudoAddrMovWIntrinsic()) {
1313+
value |= (uint64_t)regNum << (16 * i);
1314+
} else {
1315+
value |= (uint64_t)regNum << (8 * i);
1316+
}
13131317
}
13141318
G4_Imm *src = builder.createImm(value, Type_UQ);
13151319
G4_INST *movInst = builder.createMov(g4::SIMD1, inst->getDst(), src,

visa/Optimizer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,9 @@ class Optimizer {
365365
PI_preRA_HWWorkaround, // always, each WA under specific control
366366
PI_postRA_HWWorkaround, // always, each WA under specific control
367367
PI_preRA_Schedule,
368-
PI_preRegAlloc, // always
368+
PI_preRegAlloc, // always
369369
PI_regAlloc, // always
370-
PI_removeLifetimeOps, // always
370+
PI_removeLifetimeOps, // always
371371
PI_removeRedundMov, // always
372372
PI_removeEmptyBlocks, // always
373373
PI_insertFallThroughJump, // always

visa/Passes/SRSubstitution.cpp

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
3939
return false;
4040
}
4141

42-
void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs, IR_Builder &builder) {
42+
void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs,
43+
IR_Builder &builder, bool isLargeGRF) {
4344
// Change the send instruction to sendi
4445
G4_InstSend *Send = inst->asSendInst();
4546
G4_SendDescRaw *desc = Send->getMsgDescRaw();
@@ -53,8 +54,9 @@ void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs, IR_Bu
5354
inst->setSrc(msgDescImm, 2);
5455

5556
// Replace source 0 with scalar register
56-
G4_SrcRegRegion *headerOpnd = builder.createSrcRegRegion(
57-
Mod_src_undef, IndirGRF, s0Var->getRegVar(), 0, 0, builder.getRegionScalar(), Type_UB);
57+
G4_SrcRegRegion *headerOpnd =
58+
builder.createSrcRegRegion(Mod_src_undef, IndirGRF, s0Var->getRegVar(), 0,
59+
0, builder.getRegionScalar(), Type_UB);
5860
// Replace source 1 with null.
5961
G4_SrcRegRegion *payloadToUse = builder.createNullSrc(Type_UD);
6062

@@ -284,7 +286,7 @@ bool SRSubPass::replaceWithSendi(G4_BB *bb, INST_LIST_ITER instIter,
284286
bb->insertBefore(instIter, movInst);
285287
}
286288

287-
changeToIndirectSend(inst, s0Var, totalRegs, builder);
289+
changeToIndirectSend(inst, s0Var, totalRegs, builder, false);
288290

289291
return true;
290292
}
@@ -431,7 +433,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
431433
// Check if current instruction is the candidate of sendi.
432434
// Recorded as candidate.
433435
bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
434-
regCandidatesBRA &dstSrcRegs) {
436+
regCandidatesBRA &dstSrcRegs) {
435437
if (!inst->isSend()) {
436438
return false;
437439
}
@@ -460,7 +462,7 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
460462
return false;
461463
}
462464

463-
//The size of LSC src0 and src1 may not be GRF aligned.
465+
// The size of LSC src0 and src1 may not be GRF aligned.
464466
if (inst->getMsgDesc()->getSrc1LenBytes() % builder.getGRFSize() != 0 ||
465467
inst->getMsgDesc()->getSrc0LenBytes() % builder.getGRFSize() != 0) {
466468
return false;
@@ -748,11 +750,12 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
748750
return true;
749751
}
750752

753+
751754
// Replace the send instruction with the payload of
752755
// Insert the scalar register intialization mov instructions.
753756
bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
754-
INST_LIST_ITER instIter,
755-
regCandidatesBRA &dstSrcRegs) {
757+
INST_LIST_ITER instIter,
758+
regCandidatesBRA &dstSrcRegs) {
756759
G4_INST *inst = *instIter;
757760
std::vector<G4_AddrExp *> srcs;
758761
G4_AddrExp *src;
@@ -854,21 +857,38 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
854857

855858
// Initialize the scalar registers.
856859
uint16_t UQNum = totalRegs > (TypeSize(Type_UQ) / TypeSize(Type_UB)) ? 2 : 1;
860+
if (dstSrcRegs.isLargeGRF) {
861+
UQNum = totalRegs > (TypeSize(Type_UQ) / TypeSize(Type_UW)) ? 2 : 1;
862+
}
857863
G4_Declare *s0Var = builder.createTempScalar(UQNum, "S0_");
858864
s0Var->getRegVar()->setPhyReg(builder.phyregpool.getScalarReg(), 0);
859865
G4_DstRegRegion *dst =
860866
builder.createDst(s0Var->getRegVar(), 0, 0, 1, Type_UQ);
861-
G4_INST *movInst = builder.createIntrinsicAddrMovInst(
862-
Intrinsic::PseudoAddrMov, dst, srcs[0], srcs[1], srcs[2], srcs[3],
863-
srcs[4], srcs[5], srcs[6], srcs[7], false);
867+
G4_INST *movInst = nullptr;
868+
if (!dstSrcRegs.isLargeGRF) {
869+
movInst = builder.createIntrinsicAddrMovInst(
870+
Intrinsic::PseudoAddrMov, dst, srcs[0], srcs[1], srcs[2], srcs[3],
871+
srcs[4], srcs[5], srcs[6], srcs[7], false);
872+
} else {
873+
movInst = builder.createIntrinsicAddrMovInst(
874+
Intrinsic::PseudoAddrMovW, dst, srcs[0], srcs[1], srcs[2], srcs[3],
875+
nullptr, nullptr, nullptr, nullptr, false);
876+
}
864877
bb->insertBefore(instIter, movInst);
865878

866-
if (totalRegs > 8) {
879+
if (UQNum > 1) {
867880
G4_DstRegRegion *dst1 =
868881
builder.createDst(s0Var->getRegVar(), 0, 1, 1, Type_UQ);
869-
G4_INST *movInst1 = builder.createIntrinsicAddrMovInst(
870-
Intrinsic::PseudoAddrMov, dst1, srcs[8], srcs[9], srcs[10], srcs[11],
871-
srcs[12], srcs[13], srcs[14], nullptr, false);
882+
G4_INST *movInst1 = nullptr;
883+
if (!dstSrcRegs.isLargeGRF) {
884+
movInst1 = builder.createIntrinsicAddrMovInst(
885+
Intrinsic::PseudoAddrMov, dst1, srcs[8], srcs[9], srcs[10], srcs[11],
886+
srcs[12], srcs[13], srcs[14], nullptr, false);
887+
} else {
888+
movInst1 = builder.createIntrinsicAddrMovInst(
889+
Intrinsic::PseudoAddrMovW, dst1, srcs[4], srcs[5], srcs[6], srcs[7],
890+
nullptr, nullptr, nullptr, nullptr, false);
891+
}
872892
bb->insertBefore(instIter, movInst1);
873893
}
874894

@@ -883,7 +903,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
883903
}
884904
}
885905

886-
changeToIndirectSend(inst, s0Var, totalRegs, builder);
906+
changeToIndirectSend(inst, s0Var, totalRegs, builder, dstSrcRegs.isLargeGRF);
887907

888908
return true;
889909
}
@@ -1005,7 +1025,8 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
10051025
int srcRegLB = (*dstSrcRegsIter).opnd->getLinearizedStart();
10061026
int srcRegRB = (*dstSrcRegsIter).opnd->getLinearizedEnd();
10071027
if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
1008-
// Register is redefined.
1028+
1029+
// Register is redefined
10091030
dstSrcRegsIter =
10101031
candidates[inst].dstSrcMap.erase(dstSrcRegsIter);
10111032
} else {

visa/Passes/SRSubstitution.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ SPDX-License-Identifier: MIT
1515
#include "../PointsToAnalysis.h"
1616

1717
#define MAXIMAL_S0_SRC0_GRF_LENGTH 15
18+
#define MAXIMAL_S0_SRC0_GRF_LENGTH_LARGE_GRF 8
19+
1820
typedef enum _INDIRECT_TYPE {
1921
NO_INDIRECT_SEND = 0,
2022
SAMPLER_MSG_ONLY = 1,
@@ -63,8 +65,9 @@ struct regCandidates {
6365

6466
struct regCandidatesBRA {
6567
int firstDefID;
68+
bool isLargeGRF;
6669
std::vector<regMapBRA> dstSrcMap;
67-
regCandidatesBRA() : firstDefID(-1) { dstSrcMap.clear(); }
70+
regCandidatesBRA() : firstDefID(-1), isLargeGRF(false) { dstSrcMap.clear(); }
6871
};
6972

7073
class SRSubPass {
@@ -108,7 +111,7 @@ class SRSubPassAfterRA {
108111
}
109112
bool isSRCandidateAfterRA(G4_INST *inst, regCandidatesBRA &dstSrcRegs);
110113
bool replaceWithSendiAfterRA(G4_BB *bb, INST_LIST_ITER instIter,
111-
regCandidatesBRA &dstSrcRegs);
114+
regCandidatesBRA &dstSrcRegs);
112115
void SRSubAfterRA(G4_BB *bb);
113116
};
114117

visa/include/VISAOptionsDefs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ DEF_VISA_OPTION(vISA_SpillMemOffset, ET_INT32, "-spilloffset",
367367
"USAGE: -spilloffset <offset>\n", 0)
368368
DEF_VISA_OPTION(vISA_ReservedGRFNum, ET_INT32, "-reservedGRFNum",
369369
"USAGE: -reservedGRFNum <regNum>\n", 0)
370+
DEF_VISA_OPTION(vISA_ReservedFromFrontGRFNum, ET_INT32,
371+
"-reservedFromFrontGRFNum",
372+
"USAGE: -reservedFromFrontGRFNum <regNum>\n", 0)
370373
DEF_VISA_OPTION(vISA_TotalGRFNum, ET_INT32, "-TotalGRFNum",
371374
"USAGE: -TotalGRFNum <regNum>\n", 0)
372375
DEF_VISA_OPTION(vISA_MaxRAIterations, ET_INT32, "-maxRAIterations",

0 commit comments

Comments
 (0)