Skip to content

Commit 66e6668

Browse files
lwesiersigcbot
authored andcommitted
Fix problem in split barrier
Fixed the problem in split barrier when we are using with regular barrier. Case: splitbarrier.signal() regularbarrier() splitbarrier.wait() was causing the hang due assigning the same ID of the barrier in the regular barrier and split barrier. Now, the split barrier will take other ID than the regular one.
1 parent d7a41cf commit 66e6668

File tree

7 files changed

+84
-25
lines changed

7 files changed

+84
-25
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3667,11 +3667,24 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbor
36673667

36683668
SetAbortOnSpillThreshold(canAbortOnSpill, AllowSpill);
36693669

3670-
if (context->type == ShaderType::COMPUTE_SHADER ||
3671-
(context->type == ShaderType::OPENCL_SHADER && !(context->getModuleMetaData()->NBarrierCnt > 0))) {
3670+
if (context->allowATOB()) {
36723671
SaveOption(vISA_ActiveThreadsOnlyBarrier, true);
36733672
}
36743673

3674+
if (context->m_instrTypes.hasSplitBarrier &&
3675+
context->m_instrTypes.hasWorkgroupBarrier)
3676+
{
3677+
// The regular and split barrier cannnot share the
3678+
// same ID, because we could have such scenario:
3679+
// splitbarrier.signal();
3680+
// workgroupbarrier();
3681+
// splitbarrier.wait();
3682+
// and this will cause a hang.
3683+
// The split barrier will use the ID 1
3684+
// so, we cannot setup ATOB for this case
3685+
SaveOption(vISA_SplitBarrierID1, true);
3686+
}
3687+
36753688
if (m_program->m_Platform->isCoreChildOf(IGFX_XE3_CORE)) {
36763689
if (uint Val = IGC_GET_FLAG_VALUE(VISASpillAllowed)) {
36773690
context->m_spillAllowed = Val;
@@ -5413,7 +5426,8 @@ void CEncoder::Compile(bool hasSymbolTable, GenXFunctionGroupAnalysis *&pFGA) {
54135426
// always set properly, even if a barrier is used as a part of Inline vISA
54145427
// code only.
54155428
if (jitInfo->numBarriers != 0 && !m_program->m_State.GetHasBarrier()) {
5416-
if (context->getModuleMetaData()->NBarrierCnt > 0 || additionalVISAAsmToLink) {
5429+
if (context->getModuleMetaData()->NBarrierCnt > 0 || additionalVISAAsmToLink ||
5430+
jitInfo->numBarriers > 1) {
54175431
m_program->m_State.SetBarrierNumber(NamedBarriersResolution::AlignNBCnt2BarrierNumber(jitInfo->numBarriers));
54185432
} else {
54195433
m_program->m_State.SetHasBarrier();

IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,12 @@ void CheckInstrTypes::visitCallInst(CallInst &C) {
296296
g_InstrTypes.numWaveIntrinsics++;
297297
break;
298298
case GenISAIntrinsic::GenISA_threadgroupbarrier:
299+
g_InstrTypes.hasWorkgroupBarrier = true;
299300
g_InstrTypes.numBarrier++;
300301
break;
302+
case GenISAIntrinsic::GenISA_threadgroupbarrier_signal:
303+
g_InstrTypes.hasSplitBarrier = true;
304+
break;
301305
case GenISAIntrinsic::GenISA_is_uniform:
302306
g_InstrTypes.hasUniformAssumptions = true;
303307
break;

IGC/Compiler/CodeGenContext.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,28 @@ int32_t CodeGenContext::getNumThreadsPerEU() const { return -1; }
722722

723723
uint32_t CodeGenContext::getExpGRFSize() const { return 0; }
724724

725+
bool CodeGenContext::allowATOB()
726+
{
727+
bool allow = type == ShaderType::COMPUTE_SHADER ||
728+
(type == ShaderType::OPENCL_SHADER &&
729+
!(getModuleMetaData()->NBarrierCnt > 0));
730+
731+
if (m_instrTypes.hasSplitBarrier &&
732+
m_instrTypes.hasWorkgroupBarrier)
733+
{
734+
// The regular and split barrier cannnot share the
735+
// same ID, because we could have such scenario:
736+
// splitbarrier.signal();
737+
// workgroupbarrier();
738+
// splitbarrier.wait();
739+
// and this will cause a hang.
740+
// The split barrier will use the ID 1
741+
// so, we cannot setup ATOB for this case
742+
allow = false;
743+
}
744+
return allow;
745+
}
746+
725747
/// parameter "returnDefault" controls what to return when
726748
/// there is no user-forced setting
727749
uint32_t CodeGenContext::getNumGRFPerThread(bool returnDefault) {

IGC/Compiler/CodeGenPublic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,8 @@ struct SInstrTypes {
296296
bool hasDynamicGenericLoadStore{};
297297
bool hasUnmaskedRegion{};
298298
bool hasSLM{};
299+
bool hasWorkgroupBarrier{};
300+
bool hasSplitBarrier{};
299301
unsigned int numCall{};
300302
unsigned int numBarrier{};
301303
unsigned int numLoadStore{};
@@ -1043,6 +1045,8 @@ class CodeGenContext {
10431045
virtual bool isBufferBoundsChecking() const;
10441046
virtual uint64_t getMinimumValidAddress() const;
10451047

1048+
bool allowATOB();
1049+
10461050
UserAddrSpaceMD &getUserAddrSpaceMD() {
10471051
IGC_ASSERT(llvmCtxWrapper);
10481052
return llvmCtxWrapper->m_UserAddrSpaceMD;

visa/BuildIR.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ class IR_Builder {
726726
// are set.
727727
return maxId + 1;
728728
}
729-
void updateBarrier();
729+
730730
void updateNamedBarrier(G4_Operand *barrierId);
731731

732732
G4_Declare *cloneDeclare(std::map<G4_Declare *, G4_Declare *> &dclMap,
@@ -2342,9 +2342,9 @@ class IR_Builder {
23422342

23432343
////////////////////////////////////////////////////////////////////////
23442344
// default barrier functions
2345-
void generateSingleBarrier(G4_Predicate *prd);
2346-
void generateBarrierSend(G4_Predicate *prd);
2347-
void generateBarrierWait(G4_Predicate *prd);
2345+
void generateSingleBarrier(G4_Predicate *prd, uint32_t id);
2346+
void generateBarrierSend(G4_Predicate *prd, uint32_t id);
2347+
void generateBarrierWait(G4_Predicate *prd, uint32_t id);
23482348
int translateVISASplitBarrierInst(G4_Predicate *prd, bool isSignal);
23492349

23502350
////////////////////////////////////////////////////////////////////////

visa/VisaToG4/TranslateSendSync.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ static void generateNamedBarrier(int &status, IR_Builder &irb,
275275
}
276276

277277

278-
void IR_Builder::generateSingleBarrier(G4_Predicate *prd) {
278+
void IR_Builder::generateSingleBarrier(G4_Predicate *prd, uint32_t id) {
279279
// single barrier: # producer = # consumer = # threads, barrier id = 0
280280
// For now produce no fence
281281
// Number of threads per threadgroup is r0.2[31:24]
@@ -286,12 +286,12 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) {
286286
// Hdr.2:d[31:24,23:16]
287287
G4_Declare *header = createTempVar(8, Type_UD, getGRFAlign());
288288
auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
289-
uint32_t headerInitValDw2 = 0x0; // initial value for DWord2
289+
uint32_t headerInitValDw2 = id; // initial value for DWord2
290290
if (getPlatform() >= Xe2 && getOption(vISA_ActiveThreadsOnlyBarrier)) {
291291
headerInitValDw2 |= (1 << 8);
292292
}
293293
// Header.2:d has the following format:
294-
// bits[7:0] = 0x0 (barrier id)
294+
// bits[7:0] = id (barrier id)
295295
// bits[8] = active only thread barrier
296296
// bits[15:14] = 0 (producer/consumer)
297297
// bits[23:16] = num producers = r0.11:b (r0.2[31:24] = num threads in tg)
@@ -311,7 +311,6 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) {
311311
createSrc(getBuiltinR0()->getRegVar(), 0, 11, getRegionScalar(), Type_UB);
312312
auto inst1 = createMov(g4::SIMD2, dst, src0, InstOpt_WriteEnable, true);
313313
inst1->addComment("signal barrier payload (nprods, ncons)");
314-
315314
// 1 message length, 0 response length, no header, no ack
316315
int desc = (0x1 << 25) + 0x4;
317316

@@ -534,16 +533,12 @@ int IR_Builder::translateVISAWaitInst(G4_Operand *mask) {
534533
return VISA_SUCCESS;
535534
}
536535

537-
void IR_Builder::updateBarrier() {
538-
// The legacy barrier is always allocated to id 0.
539-
usedBarriers.set(0, true);
540-
}
541-
542-
void IR_Builder::generateBarrierSend(G4_Predicate *prd) {
543-
updateBarrier();
536+
void IR_Builder::generateBarrierSend(G4_Predicate *prd, uint32_t id = 0) {
537+
// The id = 0 is the alias for the regular threadgroup barrier.
538+
usedBarriers.set(id, true);
544539

545540
if (hasUnifiedBarrier()) {
546-
generateSingleBarrier(prd);
541+
generateSingleBarrier(prd, id);
547542
return;
548543
}
549544

@@ -576,8 +571,9 @@ void IR_Builder::generateBarrierSend(G4_Predicate *prd) {
576571
createImm(desc, Type_UD), InstOpt_WriteEnable, msgDesc, true);
577572
}
578573

579-
void IR_Builder::generateBarrierWait(G4_Predicate *prd) {
580-
updateBarrier();
574+
void IR_Builder::generateBarrierWait(G4_Predicate *prd, uint32_t id = 0) {
575+
// The id = 0 is the alias for the regular threadgroup barrier.
576+
usedBarriers.set(id, true);
581577

582578
G4_Operand *waitSrc = nullptr;
583579
if (!hasUnifiedBarrier()) {
@@ -592,8 +588,8 @@ void IR_Builder::generateBarrierWait(G4_Predicate *prd) {
592588
}
593589
} else {
594590
if (getPlatform() >= Xe_PVC) {
595-
// PVC: sync.bar 0
596-
waitSrc = createImm(0, Type_UD);
591+
// PVC: sync.bar id
592+
waitSrc = createImm(id, Type_UD);
597593
} else {
598594
// DG2: sync.bar null
599595
waitSrc = createNullSrc(Type_UD);
@@ -751,10 +747,24 @@ int IR_Builder::translateVISASplitBarrierInst(G4_Predicate *prd,
751747
bool isSignal) {
752748
TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
753749

750+
uint32_t id = 0;
751+
752+
if (getOption(vISA_SplitBarrierID1) &&
753+
getPlatform() >= Xe_PVC) {
754+
// We have a mix usage of the
755+
// workgroupbarrier and splitbarrier.
756+
// We need to split the ID usage:
757+
// workgroupbarrier takes ID:0
758+
// splitbarrier takes ID:1
759+
// to avoid cross usage of the same ID
760+
// and hang on the GPU.
761+
id = 1;
762+
}
763+
754764
if (isSignal) {
755-
generateBarrierSend(prd);
765+
generateBarrierSend(prd, id);
756766
} else {
757-
generateBarrierWait(prd);
767+
generateBarrierWait(prd, id);
758768
}
759769

760770
return VISA_SUCCESS;

visa/include/VISAOptionsDefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,11 @@ DEF_VISA_OPTION(vISA_ActiveThreadsOnlyBarrier, ET_BOOL,
791791
"This enables the active-only bit in workgroup barriers. "
792792
"With this option exited threads are not counted in expected "
793793
"arrival count total and will not cause hangs.", false)
794+
DEF_VISA_OPTION(vISA_SplitBarrierID1, ET_BOOL,
795+
"-splitbarrierid1",
796+
"This flag switch ID of the split barrier to 1. "
797+
"After that change, the workgroupbarrier and splitbarrer"
798+
"can work togheter and will not cause hangs.", false)
794799
DEF_VISA_OPTION(vISA_RestrictSrc1ByteSwizzle, ET_BOOL,
795800
"-restrictSrc1ByteSwizzle",
796801
"Enable the WA to restrict src1 byte swizzle case", false)

0 commit comments

Comments
 (0)