Skip to content

Commit 1a13bf0

Browse files
eternastudentoigcbot
authored andcommitted
Use load_ugm instruction for predication of an untyped atomic
Current implementation uses send.smpl ld_lz instruction to lookup an accumulator value of untyped atomic. Adding a mode to use load_ugm instruction instead.
1 parent f80ee10 commit 1a13bf0

File tree

2 files changed

+53
-28
lines changed

2 files changed

+53
-28
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7194,21 +7194,24 @@ void InsertBranchOpt::atomicSplitOpt(Function& F, int mode)
71947194
{
71957195
enum Mode
71967196
{
7197-
Disable = 0x0, // Disabled IGC\EnableAtomicBranch = 0x0
7198-
ZeroAdd = BIT(0), // Enabled IGC\EnableAtomicBranch = 0x1
7199-
UMax = BIT(1), // Enabled IGC\EnableAtomicBranch = 0x2
7200-
UMin = BIT(2) // Enabled IGC\EnableAtomicBranch = 0x4
7197+
Disable = 0x0, // Disabled IGC\EnableAtomicBranch = 0x0
7198+
ZeroAdd = BIT(0), // Enabled IGC\EnableAtomicBranch = 0x1
7199+
UMax = BIT(1), // Enabled IGC\EnableAtomicBranch = 0x2
7200+
UMin = BIT(2), // Enabled IGC\EnableAtomicBranch = 0x4
7201+
UntypedUgmLoad = BIT(3) // Enabled IGC\EnableAtomicBranch = 0x8
72017202
};
72027203

7203-
// Allow both modes to be applied
7204-
bool zeroAddMode = ( ( mode & ZeroAdd ) == ZeroAdd );
7205-
bool umaxMode = ( ( mode & UMax ) == UMax );
7206-
bool uminMode = ( ( mode & UMin ) == UMin );
7204+
// Allow several modes to be applied
7205+
const bool zeroAddMode = ( ( mode & ZeroAdd ) == ZeroAdd );
7206+
const bool umaxMode = ( ( mode & UMax ) == UMax );
7207+
const bool uminMode = ( ( mode & UMin ) == UMin );
7208+
const bool untypedUgmLoadMode = ( ( mode & UntypedUgmLoad ) == UntypedUgmLoad );
72077209

7208-
auto createReadFromAtomic = []( IRBuilder<>& builder, Instruction* inst, bool isTyped )
7210+
auto createReadFromAtomic = [=]( IRBuilder<>& builder, Instruction* inst, bool isTyped )
72097211
{
72107212
Constant* zero = ConstantInt::get( inst->getType(), 0 );
72117213
Instruction* NewInst = nullptr;
7214+
72127215
if( isTyped )
72137216
{
72147217
Function* pLdIntrinsic = llvm::GenISAIntrinsic::getDeclaration(
@@ -7228,27 +7231,49 @@ void InsertBranchOpt::atomicSplitOpt(Function& F, int mode)
72287231
{
72297232
std::vector<Type*> types;
72307233
std::vector<Value*> ld_FunctionArgList;
7231-
7234+
Function* pLdIntrinsic;
72327235
Value* resourcePtr = inst->getOperand( 0 );
7233-
types.push_back( IGCLLVM::FixedVectorType::get( builder.getFloatTy(), 4 ) );
7234-
types.push_back( resourcePtr->getType() );//Paired resource
7235-
types.push_back( resourcePtr->getType() );//Resource
72367236

7237+
// Generate load.ugm instruction
7238+
if ( untypedUgmLoadMode )
7239+
{
7240+
alignment_t alignment = (alignment_t) (inst->getType()->getScalarSizeInBits() / 8);
7241+
7242+
types.push_back( IGCLLVM::FixedVectorType::get( builder.getFloatTy(), 4 ) );
7243+
types.push_back( resourcePtr->getType() );
7244+
pLdIntrinsic = GenISAIntrinsic::getDeclaration(
7245+
inst->getModule(),
7246+
GenISAIntrinsic::GenISA_ldrawvector_indexed,
7247+
types );
7248+
7249+
ld_FunctionArgList.push_back( resourcePtr );
7250+
ld_FunctionArgList.push_back( inst->getOperand( 1 ) );
7251+
ld_FunctionArgList.push_back( builder.getInt32( (uint32_t) alignment ) ); // alignment
7252+
ld_FunctionArgList.push_back( builder.getInt1( true ) ); // volatile
7253+
}
7254+
// Generate send.smpl ld_lz instruction
7255+
else
7256+
{
7257+
types.push_back( IGCLLVM::FixedVectorType::get( builder.getFloatTy(), 4 ) );
7258+
types.push_back( resourcePtr->getType() );//Paired resource
7259+
types.push_back( resourcePtr->getType() );//Resource
7260+
7261+
pLdIntrinsic = GenISAIntrinsic::getDeclaration(
7262+
inst->getModule(),
7263+
GenISAIntrinsic::GenISA_ldptr,
7264+
types );
7265+
7266+
ld_FunctionArgList.push_back( inst->getOperand( 1 ) ); //coordinates x
7267+
ld_FunctionArgList.push_back( zero ); //coordinates y
7268+
ld_FunctionArgList.push_back( zero ); //coordinates z
7269+
ld_FunctionArgList.push_back( zero ); //lod
7270+
ld_FunctionArgList.push_back( llvm::UndefValue::get( resourcePtr->getType() ) );
7271+
ld_FunctionArgList.push_back( resourcePtr ); //src buffer
7272+
ld_FunctionArgList.push_back( zero ); //immediate offset u
7273+
ld_FunctionArgList.push_back( zero ); //immediate offset v
7274+
ld_FunctionArgList.push_back( zero ); //immediate offset w
7275+
}
72377276

7238-
Function* pLdIntrinsic = GenISAIntrinsic::getDeclaration(
7239-
inst->getModule(),
7240-
GenISAIntrinsic::GenISA_ldptr,
7241-
types );
7242-
7243-
ld_FunctionArgList.push_back( inst->getOperand( 1 ) ); //coordinates x
7244-
ld_FunctionArgList.push_back( zero ); //coordinates y
7245-
ld_FunctionArgList.push_back( zero ); //coordinates z
7246-
ld_FunctionArgList.push_back( zero ); //lod
7247-
ld_FunctionArgList.push_back( llvm::UndefValue::get( inst->getOperand( 0 )->getType() ) );
7248-
ld_FunctionArgList.push_back( inst->getOperand( 0 ) ); //src buffer
7249-
ld_FunctionArgList.push_back( zero ); //immediate offset u
7250-
ld_FunctionArgList.push_back( zero ); //immediate offset v
7251-
ld_FunctionArgList.push_back( zero ); //immediate offset w
72527277
NewInst = builder.CreateCall( pLdIntrinsic, ld_FunctionArgList );
72537278
}
72547279

IGC/common/igc_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ DECLARE_IGC_REGKEY(debugString, LLVMCommandLine, 0, "applies LLVM com
234234
DECLARE_IGC_REGKEY(debugString, SelectiveHashOptions, 0, "applies options to hash range via string", false)
235235
DECLARE_IGC_REGKEY(bool, DisableDX9LowPrecision, true, "Disables HF in DX9.", false)
236236
DECLARE_IGC_REGKEY(bool, EnablePingPongTextureOpt, true, "Enables the Ping Pong texture optimization which is used only for Compute Shaders for back to back dispatches", false)
237-
DECLARE_IGC_REGKEY(DWORD,EnableAtomicBranch, 0, "Bitmask to enable Atomic branch optimization that predicates atomic with if/else. 1: if Val == 0 ignore iadd/sub/umax 0. 2: checks if memory is lower than Val for umax. 4: checks if memory if greater than Val for umin.", false)
237+
DECLARE_IGC_REGKEY(DWORD,EnableAtomicBranch, 0, "Bitmask to enable Atomic branch optimization that predicates atomic with if/else. 1: if Val == 0 ignore iadd/sub/umax 0. 2: checks if memory is lower than Val for umax. 4: checks if memory if greater than Val for umin. 8: generate load_ugm for untyped atomics, otherwise ld_lz", false)
238238
DECLARE_IGC_REGKEY(bool, EnableThreeWayLoadSpiltOpt, false, "Enable three way load spilt opt.", false)
239239
DECLARE_IGC_REGKEY(bool, EnableSamplerChannelReturn, true, "Setting this to 1/true adds a compiler switch to enable using header to return selective channels from sampler", false)
240240
DECLARE_IGC_REGKEY(bool, EnableThreadCombiningOpt, true, "Enables the thread combining optimization which is used only for Compute Shaders for combining a number of software threads to dispatch smaller number of hardware threads", false)

0 commit comments

Comments
 (0)