Skip to content

Commit 6dfeb96

Browse files
YuriPlyakhinigcbot
authored andcommitted
Predicated load/store: bug fixing, tests
Fixed several bugs and added tests for predicated load/store implementation
1 parent 4042250 commit 6dfeb96

File tree

18 files changed

+1198
-48
lines changed

18 files changed

+1198
-48
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19700,8 +19700,12 @@ void EmitPass::emitLSCVectorLoad_uniform(
1970019700
dSize = 4;
1970119701
vSize = vSize * 2;
1970219702
ldDest = m_currShader->GetNewAlias(ldDest, ISA_TYPE_UD, 0, 0);
19703-
if (mergeVal)
19704-
newMergeVal = m_currShader->GetNewAlias(mergeVal, ISA_TYPE_UD, 0, 0);
19703+
if (mergeVal) {
19704+
if (mergeVal->IsImmediate())
19705+
newMergeVal = m_currShader->ImmToVariable(mergeVal->GetImmediateValue(), ISA_TYPE_UD);
19706+
else
19707+
newMergeVal = m_currShader->GetNewAlias(mergeVal, ISA_TYPE_UD, 0, 0);
19708+
}
1970519709
}
1970619710

1970719711
bool destUniform = Dest->IsUniform();

IGC/Compiler/CISACodeGen/VectorPreProcess.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ namespace
489489
SmallVector<std::pair<Type*, uint32_t>, 8>& SplitInfo);
490490
// If predicated loads are split, we also need to split merge values
491491
void createSplitMergeValues(
492-
BasicBlock* BB,
492+
Instruction* Inst,
493493
Value* OrigMergeVal,
494494
const SmallVector<std::pair<Type*, uint32_t>, 8>& SplitInfo,
495495
ValVector& NewMergeVals) const;
@@ -695,7 +695,7 @@ void VectorPreProcess::createSplitVectorTypes(
695695
}
696696
}
697697

698-
void VectorPreProcess::createSplitMergeValues(BasicBlock* BB,
698+
void VectorPreProcess::createSplitMergeValues(Instruction* Inst,
699699
Value* OrigMergeVal,
700700
const SmallVector<std::pair<Type*, uint32_t>, 8>& SplitInfo,
701701
ValVector& NewMergeVals) const
@@ -731,10 +731,7 @@ void VectorPreProcess::createSplitMergeValues(BasicBlock* BB,
731731
return;
732732
}
733733

734-
// Init IRBuilder with Basic block, if OrigMergeVal is not an instruction.
735-
std::unique_ptr<IRBuilder<>> Builder = isa<Instruction>(OrigMergeVal) ?
736-
std::make_unique<IRBuilder<>>(cast<Instruction>(OrigMergeVal)) :
737-
std::make_unique<IRBuilder<>>(BB);
734+
IRBuilder<> Builder(Inst);
738735

739736
// Case when we split vector merge value into subvectors. Element type is the same.
740737
// Just one big vector is being split into subvectors.
@@ -756,14 +753,14 @@ void VectorPreProcess::createSplitMergeValues(BasicBlock* BB,
756753
{
757754
for (uint32_t j = 0, e = int_cast<uint32_t>(VTy->getNumElements()); j < e; ++j)
758755
{
759-
Value *Elt = (idx < OrigVTyNEl) ? Builder->CreateExtractElement(OrigMergeVal, Builder->getInt32(idx++)) :
756+
Value *Elt = (idx < OrigVTyNEl) ? Builder.CreateExtractElement(OrigMergeVal, Builder.getInt32(idx++)) :
760757
Constant::getNullValue(VTy->getElementType());
761-
NewMergeVal = Builder->CreateInsertElement(NewMergeVal, Elt, Builder->getInt32(j));
758+
NewMergeVal = Builder.CreateInsertElement(NewMergeVal, Elt, Builder.getInt32(j));
762759
}
763760
}
764761
else
765762
{
766-
NewMergeVal = Builder->CreateExtractValue(OrigMergeVal, idx++);
763+
NewMergeVal = Builder.CreateExtractElement(OrigMergeVal, Builder.getInt32(idx++));
767764
}
768765
NewMergeVals.push_back(NewMergeVal);
769766
}
@@ -775,7 +772,7 @@ void VectorPreProcess::createSplitMergeValues(BasicBlock* BB,
775772
// Case when we change scalar value into vector with smaller element type.
776773
IGC_ASSERT_MESSAGE(SplitInfo.size() == 1, "Unexpected split info!");
777774
IGC_ASSERT_MESSAGE(SplitInfo[0].second == 1, "Unexpected split info!");
778-
Value *NewMergeVal = Builder->CreateBitCast(OrigMergeVal, SplitInfo[0].first);
775+
Value *NewMergeVal = Builder.CreateBitCast(OrigMergeVal, SplitInfo[0].first);
779776
NewMergeVals.push_back(NewMergeVal);
780777
}
781778

@@ -1116,7 +1113,7 @@ bool VectorPreProcess::splitLoad(
11161113

11171114
ValVector splitMergeValues;
11181115
if (isPredLd)
1119-
createSplitMergeValues(LI->getParent(), cast<PredicatedLoadIntrinsic>(LI)->getMergeValue(), splitInfo, splitMergeValues);
1116+
createSplitMergeValues(LI, cast<PredicatedLoadIntrinsic>(LI)->getMergeValue(), splitInfo, splitMergeValues);
11201117

11211118
Value* Addr = ALI.getPointerOperand();
11221119
auto Align = ALI.getAlignment();
@@ -1306,7 +1303,7 @@ bool VectorPreProcess::splitVector3LoadStore(Instruction* Inst)
13061303

13071304
ValVector splitMergeValues;
13081305
if (isPredLoad)
1309-
createSplitMergeValues(Inst->getParent(), cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
1306+
createSplitMergeValues(Inst, cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
13101307
{ {newVTy, 1} }, splitMergeValues);
13111308

13121309
Value* V = ALI->Create(newVTy, isPredLoad ? splitMergeValues[0] : nullptr);
@@ -1323,7 +1320,7 @@ bool VectorPreProcess::splitVector3LoadStore(Instruction* Inst)
13231320

13241321
ValVector splitMergeValues;
13251322
if (isPredLoad)
1326-
createSplitMergeValues(Inst->getParent(), cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
1323+
createSplitMergeValues(Inst, cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
13271324
{ {newVTy, 1}, {eTy, 1} }, splitMergeValues);
13281325

13291326
Value* V2 = ALI->Create(newVTy, isPredLoad ? splitMergeValues[0] : nullptr);
@@ -1778,7 +1775,7 @@ Instruction* VectorPreProcess::simplifyLoadStore(Instruction* Inst)
17781775
bool isPredLoad = isa<PredicatedLoadIntrinsic>(Inst);
17791776
ValVector splitMergeValues;
17801777
if (isPredLoad)
1781-
createSplitMergeValues(Inst->getParent(), cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
1778+
createSplitMergeValues(Inst, cast<PredicatedLoadIntrinsic>(Inst)->getMergeValue(),
17821779
{ {NewVecTy, 1} }, splitMergeValues);
17831780

17841781
NewLI = ALI.Create(NewVecTy, isPredLoad ? splitMergeValues[0] : nullptr);
@@ -1997,7 +1994,7 @@ bool VectorPreProcess::processScalarLoadStore(Function& F)
19971994
bool isPredLd = isa<PredicatedLoadIntrinsic>(inst);
19981995
ValVector splitMergeValues;
19991996
if (isPredLd)
2000-
createSplitMergeValues(inst->getParent(), cast<PredicatedLoadIntrinsic>(inst)->getMergeValue(),
1997+
createSplitMergeValues(inst, cast<PredicatedLoadIntrinsic>(inst)->getMergeValue(),
20011998
{ {newVecTy, 1} }, splitMergeValues);
20021999
Value *MergeVal = isPredLd ? splitMergeValues[0] : nullptr;
20032000

IGC/Compiler/CISACodeGen/VectorProcess.cpp

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ namespace
156156
private:
157157
bool reLayoutLoadStore(Instruction* Inst);
158158
bool optimizeBitCast(BitCastInst* BC);
159-
Value* ProcessMergeValue(BasicBlock *BB, Value* V, Type* NewTy) const;
159+
Value* ProcessMergeValue(Instruction *Inst, Value* V, Type* NewTy,
160+
Type* NewIntETy, Type* NewIntTy) const;
160161

161162
private:
162163
const DataLayout* m_DL;
@@ -372,6 +373,11 @@ bool VectorProcess::reLayoutLoadStore(Instruction* Inst)
372373
newPtr = Builder.CreateBitCast(Ptr, newPtrTy, "vptrcast");
373374
}
374375

376+
// These types are needed when we are dealing with pointers
377+
// and using ptrtoint and inttoptr.
378+
Type* int_eTy = Type::getIntNTy(*m_C, eTyBits);
379+
Type* new_intTy = VTy ? FixedVectorType::get(int_eTy, nelts) : int_eTy;
380+
375381
if (LI || (II && II->getIntrinsicID() == GenISAIntrinsic::GenISA_PredicatedLoad))
376382
{
377383
Instruction* oldLoad = LI ? cast<Instruction>(LI) : cast<Instruction>(II);
@@ -394,7 +400,7 @@ bool VectorProcess::reLayoutLoadStore(Instruction* Inst)
394400
GenISAIntrinsic::GenISA_PredicatedLoad,
395401
types);
396402
load = Builder.CreateCall4(F, newPtr, II->getOperand(1), II->getOperand(2),
397-
ProcessMergeValue(Inst->getParent(), II->getOperand(3), newVTy));
403+
ProcessMergeValue(Inst, II->getOperand(3), newVTy, int_eTy, new_intTy));
398404
}
399405
load->copyMetadata(*oldLoad);
400406

@@ -407,8 +413,6 @@ bool VectorProcess::reLayoutLoadStore(Instruction* Inst)
407413
// the original vector type with ptr element type replaced
408414
// with int-element type.
409415
// second, IntToPtr cast to the original vector type.
410-
Type* int_eTy = Type::getIntNTy(*m_C, eTyBits);
411-
Type* new_intTy = VTy ? FixedVectorType::get(int_eTy, nelts) : int_eTy;
412416
V = Builder.CreateBitCast(V, new_intTy);
413417
if (VTy)
414418
{
@@ -723,28 +727,53 @@ bool VectorProcess::runOnFunction(Function& F)
723727
return changed;
724728
}
725729

726-
Value* VectorProcess::ProcessMergeValue(BasicBlock *BB, Value* V, Type* NewTy) const
730+
Value* VectorProcess::ProcessMergeValue(Instruction *Inst, Value* V, Type* NewTy, Type* NewIntEType, Type* NewIntTy) const
727731
{
728732
// if V is a zero initializer, undef or poison value, we just need to create
729733
// corresponding value of NewTy.
730734
if (isa<ConstantAggregateZero>(V)) {
731-
if(IGCLLVM::FixedVectorType *VTy = dyn_cast<IGCLLVM::FixedVectorType>(NewTy))
732-
return ConstantAggregateZero::get(VTy);
735+
if(IGCLLVM::FixedVectorType *NewVTy = dyn_cast<IGCLLVM::FixedVectorType>(NewTy))
736+
return ConstantAggregateZero::get(NewVTy);
733737
else
734738
return Constant::getNullValue(NewTy);
735739
}
736740

741+
if (isa<PoisonValue>(V))
742+
return PoisonValue::get(NewTy);
743+
737744
if (isa<UndefValue>(V))
738745
return UndefValue::get(NewTy);
739746

740-
if (isa<PoisonValue>(V))
741-
return PoisonValue::get(NewTy);
747+
IRBuilder<> Builder(Inst);
748+
749+
Type *Ty = V->getType();
750+
IGCLLVM::FixedVectorType* const VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
751+
uint32_t nelts = VTy ? int_cast<uint32_t>(VTy->getNumElements()) : 1;
752+
Type* eTy = VTy ? VTy->getElementType() : Ty;
742753

743-
std::unique_ptr<IRBuilder<>> Builder = isa<Instruction>(V) ?
744-
std::make_unique<IRBuilder<>>(cast<Instruction>(V)) :
745-
std::make_unique<IRBuilder<>>(BB);
754+
if (eTy->isPointerTy())
755+
{
756+
// cannot bitcast ptr to int; First, PtrToInt cast
757+
// then bitcast int (scalar or vector) to the new type.
758+
if (VTy)
759+
{
760+
// need a vector ptrtoint, scalarize:
761+
auto* oldV = V;
762+
V = UndefValue::get(NewIntTy);
763+
for (unsigned i = 0; i < nelts; ++i)
764+
{
765+
auto* EE = Builder.CreateExtractElement(oldV, i);
766+
auto* PTI = Builder.CreatePtrToInt(EE, NewIntEType);
767+
V = Builder.CreateInsertElement(V, PTI, i);
768+
}
769+
}
770+
else
771+
{
772+
V = Builder.CreatePtrToInt(V, NewIntTy);
773+
}
774+
}
746775

747-
return Builder->CreateBitCast(V, NewTy);
776+
return Builder.CreateBitCast(V, NewTy);
748777
}
749778

750779
//

IGC/Compiler/Optimizer/PromoteToPredicatedMemoryAccess.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,20 @@ namespace IGC {
4747

4848
bool PromoteToPredicatedMemoryAccess::runOnFunction(Function &F) {
4949
CodeGenContext* pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
50+
51+
bool isStatlessToBindlessEnabled = (pCtx->type == ShaderType::OPENCL_SHADER &&
52+
static_cast<OpenCLProgramContext*>(pCtx)->m_InternalOptions.PromoteStatelessToBindless);
5053
if (!pCtx->platform.hasLSC() ||
5154
!pCtx->platform.LSCEnabled() ||
52-
(pCtx->type == ShaderType::OPENCL_SHADER &&
53-
static_cast<OpenCLProgramContext*>(pCtx)->m_InternalOptions.PromoteStatelessToBindless) ||
54-
pCtx->useStatelessToStateful())
55+
isStatlessToBindlessEnabled ||
56+
pCtx->useStatelessToStateful()) {
57+
LLVM_DEBUG(dbgs() << "Skip promotion to predicated memory operations because one of conditions is false:\n"
58+
<< " - Platform has LSC: " << pCtx->platform.hasLSC() << "\n"
59+
<< " - LSC is enabled: " << pCtx->platform.LSCEnabled() << "\n"
60+
<< " - PromoteStatelessToBindless is disabled: " << !isStatlessToBindlessEnabled << "\n"
61+
<< " - useStatelessToStateful is disabled: " << !pCtx->useStatelessToStateful() << "\n");
5562
return false;
63+
}
5664

5765
SmallVector<std::pair<BranchInst *, bool>, 8> WorkList;
5866

@@ -103,10 +111,6 @@ void PromoteToPredicatedMemoryAccess::fixPhiNode(PHINode &Phi, BasicBlock &Prede
103111
bool PromoteToPredicatedMemoryAccess::trySingleBlockIfConv(Value &Cond, BasicBlock &BranchBB,
104112
BasicBlock &ConvBB, BasicBlock &SuccBB,
105113
bool Inverse) {
106-
// Reject the candidate if the condition is not an integer compare instruction.
107-
if (!isa<ICmpInst>(Cond))
108-
return false;
109-
110114
if (!ConvBB.hasNPredecessors(1))
111115
return false;
112116

IGC/Compiler/Optimizer/PromoteToPredicatedMemoryAccess.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ namespace IGC
2727
// %17 = call <4 x float> @llvm.genx.GenISA.PredicatedLoad.v4f32.p1v4f32.v4f32(<4 x float> addrspace(1)* %bitc0, i64 16, i1 %pred, <4 x float> %mergeValue)
2828
// if found in specific pattern and then performs if-conversion.
2929
//
30-
// The pass looks for conditional branches that can be if-converted. The
31-
// condition must be an integer compare instruction. The only "hammock" form
30+
// The pass looks for conditional branches that can be if-converted. The only "hammock" form
3231
// of the control flow is supported, i.e. the true block has a single
3332
// predecessor and the false block has two predecessors. The true block must
3433
// have a single successor that is the false block.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: llvm-14-plus, regkeys
9+
;
10+
; RUN: igc_opt --opaque-pointers -platformbmg -igc-emit-visa %s -regkey DumpVISAASMToConsole | FileCheck %s
11+
; ------------------------------------------------
12+
; EmitVISAPass
13+
; ------------------------------------------------
14+
15+
; Verifies that predicated loads are emitted correctly for subDW loads
16+
17+
define spir_kernel void @test(ptr addrspace(1) align 1 %in, i32 %predicate) {
18+
entry:
19+
; calculated predicate
20+
%p = icmp slt i32 0, %predicate
21+
22+
; CHECK: .decl [[G_ALIAS0:.*]] v_type=G type=b num_elts=4 align=wordx32 alias=<[[GATHER0:.*]], 0>
23+
; CHECK: .decl [[G_ALIAS1:.*]] v_type=G type=b num_elts=4 align=wordx32 alias=<[[GATHER1:.*]], 0>
24+
25+
; copy merge value. do predicated load, copy result
26+
; CHECK: mov (M1_NM, 1) [[G_ALIAS0]](0,0)<0> 0x0:b
27+
; CHECK: (P1) lsc_load.ugm (M1_NM, 1) [[GATHER0]]:d8c32 flat[
28+
; CHECK: mov (M1_NM, 1) res0(0,0)<1> [[G_ALIAS0]](0,0)<0;1,0>
29+
%res0 = call i8 @llvm.genx.GenISA.PredicatedLoad.i8.p1.i8(ptr addrspace(1) %in, i64 1, i1 %p, i8 0)
30+
31+
; do predicated load, then do predicated copy of result to merge value which is used as dest.
32+
; CHECK: (P1) lsc_load.ugm (M1_NM, 1) [[GATHER1]]:d8c32 flat[
33+
; CHECK: (P1) mov (M1_NM, 1) mVi8(0,0)<1> [[G_ALIAS1]](0,0)<0;1,0>
34+
%mergeV = add i32 %predicate, 5
35+
%mVi8 = trunc i32 %mergeV to i8
36+
%res1 = call i8 @llvm.genx.GenISA.PredicatedLoad.i8.p1.i8(ptr addrspace(1) %in, i64 1, i1 %p, i8 %mVi8)
37+
38+
ret void
39+
}
40+
41+
declare i8 @llvm.genx.GenISA.PredicatedLoad.i8.p1.i8(ptr addrspace(1), i64, i1, i8)
42+
43+
!IGCMetadata = !{!0}
44+
!igc.functions = !{!3}
45+
46+
!0 = !{!"ModuleMD", !132}
47+
!3 = !{ptr @test, !4}
48+
!4 = !{!5}
49+
!5 = !{!"function_type", i32 0}
50+
!132 = !{!"FuncMD", !133, !134}
51+
!133 = !{!"FuncMDMap[0]", ptr @test}
52+
!134 = !{!"FuncMDValue[0]", !167}
53+
!167 = !{!"resAllocMD", !171}
54+
!171 = !{!"argAllocMDList", !172}
55+
!172 = !{!"argAllocMDListVec[0]", !173}
56+
!173 = !{!"type", i32 0}

0 commit comments

Comments
 (0)