Skip to content

Commit 5ed8d56

Browse files
committed
Add a field for per thread data prolog offset
Change-Id: Ifad94dcd5aa55715c44d47089ed31a70fb4e5f7f
1 parent 42f0813 commit 5ed8d56

File tree

8 files changed

+34
-11
lines changed

8 files changed

+34
-11
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4623,6 +4623,8 @@ void CEncoder::Compile()
46234623

46244624
pOutput->m_scratchSpaceUsedByGtpin = jitInfo->numBytesScratchGtpin;
46254625

4626+
pOutput->m_offsetToSkipPerThreadDataLoad = jitInfo->offsetToSkipPerThreadDataLoad;
4627+
46264628
COMPILER_TIME_END(m_program->GetContext(), TIME_CG_vISAEmitPass);
46274629
}
46284630

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,7 @@ void COpenCLKernel::FillKernel()
16671667
}
16681668

16691669
m_kernelInfo.m_executionEnivronment.HasGlobalAtomics = GetHasGlobalAtomics();
1670+
m_kernelInfo.m_threadPayload.OffsetToSkipPerThreadDataLoad = ProgramOutput()->m_offsetToSkipPerThreadDataLoad;
16701671

16711672
}
16721673

IGC/Compiler/CodeGenPublic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ namespace IGC
108108
void* m_funcRelocationTable;
109109
unsigned int m_funcRelocationTableSize;
110110
unsigned int m_funcRelocationTableEntries;
111+
unsigned int m_offsetToSkipPerThreadDataLoad = 0;
111112

112113
void Destroy()
113114
{

visa/BinaryEncodingIGA.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,19 @@ void BinaryEncodingIGA::DoAll()
813813
{
814814
inst.second->setGenOffset(inst.first->getPC());
815815
}
816+
if (kernel.fg.builder->getHasPerThreadProlog())
817+
{
818+
// per thread data load is in the first BB
819+
#if 0
820+
assert(kernel.fg.getNumBB() > 1 && "expect at least one prolog BB");
821+
auto secondBB = *(std::next(kernel.fg.BBs.begin()));
822+
assert(secondBB->size() > 0 && "expect at least one inst in second BB");
823+
auto startInst = *(secondBB->begin());
824+
kernel.fg.builder->getJitInfo()->offsetToSkipPerThreadDataLoad = (uint32_t)startInst->getGenOffset();
825+
#else
826+
kernel.fg.builder->getJitInfo()->offsetToSkipPerThreadDataLoad = 8 * 16;
827+
#endif
828+
}
816829
}
817830

818831
SendDescArg BinaryEncodingIGA::getIGASendDescArg(G4_INST* sendInst) const

visa/BuildIR.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,7 @@ class IR_Builder {
507507
bool hasNullReturnSampler = false;
508508

509509
int perThreadInputSize = 0;
510+
bool hasPerThreadProlog = false;
510511

511512
public:
512513
PreDefinedVars preDefVars;
@@ -607,6 +608,8 @@ class IR_Builder {
607608

608609
uint32_t getPerThreadInputSize() const { return perThreadInputSize; }
609610
void setPerThreadInputSize(uint32_t val) { perThreadInputSize = val; }
611+
bool getHasPerThreadProlog() const { return hasPerThreadProlog; }
612+
void setHasPerThreadProlog() { hasPerThreadProlog = true; }
610613

611614
bool isOpndAligned( G4_Operand *opnd, unsigned short &offset, int align_byte );
612615

visa/FlowGraph.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -595,9 +595,9 @@ void FlowGraph::constructFlowGraph(INST_LIST& instlist)
595595
// create the entry block of the flow graph
596596
//
597597
G4_BB* fstartBB = NULL;
598-
G4_BB* curr_BB = entryBB = fstartBB = beginBB(labelMap, instlist.front());
598+
G4_BB* curr_BB = fstartBB = beginBB(labelMap, instlist.front());
599599

600-
kernelInfo = new (mem)FuncInfo(UINT_MAX, entryBB, NULL);
600+
kernelInfo = new (mem)FuncInfo(UINT_MAX, curr_BB, NULL);
601601

602602
std::vector<G4_BB*> subroutineStartBB; // needed by handleExit()
603603

@@ -837,7 +837,7 @@ void FlowGraph::constructFlowGraph(INST_LIST& instlist)
837837
// Ensure each block other than entry starts with a label.
838838
for (auto bb : BBs)
839839
{
840-
if (bb != entryBB && !bb->empty())
840+
if (bb != getEntryBB() && !bb->empty())
841841
{
842842
G4_INST *inst = bb->front();
843843
if (inst->isLabel())
@@ -1833,7 +1833,7 @@ void FlowGraph::removeUnreachableBlocks()
18331833
//
18341834
// assign DFS based pre/rpost ids to all blocks in the main program
18351835
//
1836-
doDFS(entryBB, preId);
1836+
doDFS(getEntryBB(), preId);
18371837

18381838
for (BB_LIST_ITER it = BBs.begin(), itEnd = BBs.end(); it != itEnd; ++it)
18391839
{
@@ -1972,7 +1972,7 @@ void FlowGraph::removeRedundantLabels()
19721972
for (BB_LIST_ITER it = BBs.begin(); it != BBs.end();)
19731973
{
19741974
G4_BB* bb = *it;
1975-
if (bb == entryBB)
1975+
if (bb == getEntryBB())
19761976
{
19771977
it++;
19781978
continue;
@@ -4818,7 +4818,7 @@ void FlowGraph::findBackEdges()
48184818
unsigned postID = 0;
48194819
backEdges.clear();
48204820

4821-
DFSTraverse(entryBB, preId, postID, kernelInfo);
4821+
DFSTraverse(getEntryBB(), preId, postID, kernelInfo);
48224822

48234823
for (auto fn : funcInfoTable)
48244824
{
@@ -4860,7 +4860,9 @@ void FlowGraph::findNaturalLoops()
48604860
loopBody.insert(loopBlock->BBBeforeCall());
48614861
}
48624862
}
4863-
else {
4863+
else
4864+
{
4865+
auto entryBB = getEntryBB();
48644866
for (auto predBB : loopBlock->Preds)
48654867
{
48664868
if (!predBB->isInNaturalLoop())

visa/FlowGraph.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,6 @@ class FlowGraph
657657

658658
private:
659659

660-
G4_BB* entryBB; // entry block
661660
unsigned traversalNum; // used for flow graph traversals
662661
unsigned numBBId; // number of basic blocks
663662
bool reducible; // reducibility of the graph
@@ -859,7 +858,8 @@ class FlowGraph
859858

860859
void preprocess(INST_LIST& instlist);
861860

862-
FlowGraph(INST_LIST_NODE_ALLOCATOR& alloc, G4_Kernel* kernel, Mem_Manager& m) : entryBB(NULL), traversalNum(0), numBBId(0), reducible(true),
861+
FlowGraph(INST_LIST_NODE_ALLOCATOR& alloc, G4_Kernel* kernel, Mem_Manager& m) :
862+
traversalNum(0), numBBId(0), reducible(true),
863863
doIPA(false), hasStackCalls(false), isStackCallFunc(false), autoLabelId(0),
864864
pKernel(kernel), mem(m), instListAlloc(alloc),
865865
builder(NULL), kernelInfo(NULL), globalOpndHT(m), framePtrDcl(NULL), stackPtrDcl(NULL),
@@ -961,8 +961,7 @@ class FlowGraph
961961
void resetLocalDataFlowData();
962962

963963
unsigned getNumBB() const {return numBBId;}
964-
G4_BB* getEntryBB() {return entryBB;}
965-
void setEntryBB(G4_BB *entry) {entryBB = entry;}
964+
G4_BB* getEntryBB() {return BBs.front();}
966965

967966
void doFilescopeVarLayout(IR_Builder& builder, DECLARE_LIST& declares, unsigned& fileScopeFrameOffset);
968967
void addFrameSetupDeclares(IR_Builder& builder, PhyRegPool& regPool);

visa/include/JitterDataStruct.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ typedef struct _CM_JIT_INFO {
7474
void* freeGRFInfo;
7575
unsigned int freeGRFInfoSize;
7676
unsigned char numBytesScratchGtpin;
77+
78+
uint32_t offsetToSkipPerThreadDataLoad = 0;
7779
} FINALIZER_INFO;
7880

7981
#endif // _CM_JITTERDATASTRUCT_

0 commit comments

Comments
 (0)