Skip to content

Commit e55b653

Browse files
weiyu-chensys_zuul
authored andcommitted
Emulate pred.any and pred.all on platforms that do not support predCtrl group size. For now emulation is limited to NoMask instructions.
Change-Id: Id8f879138d1df10853352b1d306978fa83efd83c
1 parent f6088c1 commit e55b653

File tree

9 files changed

+131
-65
lines changed

9 files changed

+131
-65
lines changed

visa/BuildIR.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -684,8 +684,7 @@ class IR_Builder {
684684
#endif
685685
}
686686

687-
688-
687+
G4_Predicate_Control vISAPredicateToG4Predicate(VISA_PREDICATE_CONTROL control, int size);
689688

690689
G4_FCALL* getFcallInfo(G4_INST* inst) {
691690
std::map<G4_INST *, G4_FCALL *>::iterator it;

visa/BuildIRImpl.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,3 +1901,66 @@ void IR_Builder::initBuiltinSLMSpillAddr(int perThreadSLMSize)
19011901
}
19021902
entryBB->insert(insertIter, instBuffer.begin(), instBuffer.end());
19031903
}
1904+
1905+
G4_Predicate_Control IR_Builder::vISAPredicateToG4Predicate(VISA_PREDICATE_CONTROL control, int size)
1906+
{
1907+
switch (control)
1908+
{
1909+
case PRED_CTRL_NON:
1910+
return PRED_DEFAULT;
1911+
case PRED_CTRL_ANY:
1912+
{
1913+
if (!predCtrlHasWidth())
1914+
{
1915+
return PRED_ANY_WHOLE;
1916+
}
1917+
switch (size)
1918+
{
1919+
case 1:
1920+
return PRED_DEFAULT;
1921+
case 2:
1922+
return PRED_ANY2H;
1923+
case 4:
1924+
return PRED_ANY4H;
1925+
case 8:
1926+
return PRED_ANY8H;
1927+
case 16:
1928+
return PRED_ANY16H;
1929+
case 32:
1930+
return PRED_ANY32H;
1931+
default:
1932+
MUST_BE_TRUE(0, "Invalid predicate control group size.");
1933+
return PRED_DEFAULT;
1934+
}
1935+
}
1936+
case PRED_CTRL_ALL:
1937+
{
1938+
if (!predCtrlHasWidth())
1939+
{
1940+
return PRED_ALL_WHOLE;
1941+
}
1942+
switch (size)
1943+
{
1944+
case 1:
1945+
return PRED_DEFAULT;
1946+
case 2:
1947+
return PRED_ALL2H;
1948+
case 4:
1949+
return PRED_ALL4H;
1950+
case 8:
1951+
return PRED_ALL8H;
1952+
case 16:
1953+
return PRED_ALL16H;
1954+
case 32:
1955+
return PRED_ALL32H;
1956+
default:
1957+
MUST_BE_TRUE(0, "Invalid predicate control group size.");
1958+
return PRED_DEFAULT;
1959+
}
1960+
}
1961+
default:
1962+
MUST_BE_TRUE(0, "Invalid predicate control.");
1963+
return PRED_DEFAULT;
1964+
}
1965+
}
1966+

visa/Common_ISA_util.cpp

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -749,65 +749,6 @@ unsigned Round_Up_Pow2(unsigned n)
749749
return i;
750750
}
751751

752-
G4_Predicate_Control vISAPredicateToG4Predicate(VISA_PREDICATE_CONTROL control, int size)
753-
{
754-
switch(control)
755-
{
756-
case PRED_CTRL_NON:
757-
return PRED_DEFAULT;
758-
case PRED_CTRL_ANY:
759-
{
760-
switch(size)
761-
{
762-
case 1:
763-
return PRED_DEFAULT;
764-
case 2:
765-
return PRED_ANY2H;
766-
case 4:
767-
return PRED_ANY4H;
768-
case 8:
769-
return PRED_ANY8H;
770-
case 16:
771-
return PRED_ANY16H;
772-
case 32:
773-
return PRED_ANY32H;
774-
default:
775-
MUST_BE_TRUE( 0, "Invalid predicate control group size." );
776-
return PRED_DEFAULT;
777-
}
778-
}
779-
case PRED_CTRL_ALL:
780-
{
781-
switch(size)
782-
{
783-
case 1:
784-
return PRED_DEFAULT;
785-
case 2:
786-
return PRED_ALL2H;
787-
case 4:
788-
return PRED_ALL4H;
789-
case 8:
790-
return PRED_ALL8H;
791-
case 16:
792-
return PRED_ALL16H;
793-
case 32:
794-
return PRED_ALL32H;
795-
default:
796-
MUST_BE_TRUE( 0, "Invalid predicate control group size." );
797-
return PRED_DEFAULT;
798-
}
799-
}
800-
default:
801-
MUST_BE_TRUE( 0, "Invalid predicate control." );
802-
return PRED_DEFAULT;
803-
}
804-
}
805-
G4_Predicate_Control Get_Pred_Ctrl(unsigned short predicate, int size)
806-
{
807-
VISA_PREDICATE_CONTROL control = (VISA_PREDICATE_CONTROL)((predicate & 0x6000) >> 13);
808-
return vISAPredicateToG4Predicate(control, size);
809-
}
810-
811752
G4_opcode Get_Pseudo_Opcode(ISA_Opcode op)
812753
{
813754
switch( op ){

visa/Common_ISA_util.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6363
unsigned short Create_CISA_Region(unsigned short vstride, unsigned short width, unsigned short hstride);
6464
unsigned Round_Up_Pow2(unsigned n);
6565
unsigned Round_Down_Pow2(unsigned n);
66-
G4_Predicate_Control Get_Pred_Ctrl( unsigned short predicate, int size);
67-
G4_Predicate_Control vISAPredicateToG4Predicate( VISA_PREDICATE_CONTROL control, int size);
6866
G4_opcode Get_Pseudo_Opcode(ISA_Opcode op);
6967
Common_VISA_EMask_Ctrl Get_Next_EMask(Common_VISA_EMask_Ctrl currEMask, int execSize);
7068
unsigned int Get_Gen4_Emask( Common_VISA_EMask_Ctrl cisa_emask, int exec_size );

visa/Gen4_IR.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3203,7 +3203,9 @@ typedef enum
32033203
PRED_ALL16H,
32043204
PRED_ALL32H,
32053205
PRED_ANYV,
3206-
PRED_ALLV
3206+
PRED_ALLV,
3207+
PRED_ANY_WHOLE, // any of the flag-bits
3208+
PRED_ALL_WHOLE // all of the flag-bits
32073209
} G4_Predicate_Control;
32083210

32093211
typedef enum

visa/HWCapsOpen.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,4 +545,9 @@
545545
{
546546
return getGenxPlatform() >= GENX_TGLLP;
547547
}
548+
549+
bool predCtrlHasWidth() const
550+
{
551+
return true;
552+
}
548553
// end HW capabilities

visa/HWConformity.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5778,6 +5778,8 @@ void HWConformity::conformBB(G4_BB* bb)
57785778

57795779
fixSelCsel(i, bb);
57805780

5781+
fixPredCtrl(i, bb);
5782+
57815783
if (inst->getExecSize() > builder.getNativeExecSize())
57825784
{
57835785
if (inst->opcode() == G4_math &&
@@ -7540,3 +7542,57 @@ bool HWConformity::fixIntToHFMove(G4_BB* bb)
75407542
return changed;
75417543
}
75427544

7545+
void HWConformity::fixPredCtrl(INST_LIST_ITER it, G4_BB* bb)
7546+
{
7547+
G4_INST* inst = *it;
7548+
G4_Predicate* pred = inst->getPredicate();
7549+
if (pred && (pred->getControl() == PRED_ANY_WHOLE || pred->getControl() == PRED_ALL_WHOLE))
7550+
{
7551+
// we need WA if pred's size is greater than inst's exec size
7552+
// and the platform does not support predctrl group size (indicated by the fact we
7553+
// have PRED_ANY_WHOLE and PRED_ALL_WHOLE)
7554+
// The case where pred size is less than inst's exec size is already undefined
7555+
// even with predCtrl group size..
7556+
G4_Declare* flagDcl = pred->getTopDcl();
7557+
if (flagDcl->getNumberFlagElements() > inst->getExecSize())
7558+
{
7559+
// convert
7560+
// (f0.any32h) sel (1) ...
7561+
// into
7562+
// cmp (1) [ne] f1 f0 0
7563+
// (f1) sel (1) ...
7564+
// and
7565+
// (f0.all32h) sel (1) ...
7566+
// into
7567+
// cmp (1) [e] f1 f0 0xFFFFFFFF
7568+
//
7569+
// if f0 happens to be < 16 elements we have to clear upper bits as well in case it has garbage values
7570+
assert(!inst->getCondMod() && "currently don't handle an instruction with conditional modifier");
7571+
assert((inst->isWriteEnableInst() || !bb->isInSimdFlow()) && "don't handle instruction in SIMD CF for now");
7572+
G4_Declare* tmpFlag = builder.createTempFlag(1);
7573+
G4_Type flagType = flagDcl->getNumberFlagElements() == 32 ? Type_UD : Type_UW;
7574+
uint32_t allOneMask = (uint32_t) ((1ULL << flagDcl->getNumberFlagElements()) - 1);
7575+
G4_Declare* cmpSrc0Flag = flagDcl;
7576+
if (flagDcl->getNumberFlagElements() < 16)
7577+
{
7578+
// clear the upper bit of the flag
7579+
auto andInst = builder.createInst(nullptr, G4_and, nullptr, false, 1, builder.Create_Dst_Opnd_From_Dcl(tmpFlag, 1),
7580+
builder.Create_Src_Opnd_From_Dcl(flagDcl, builder.getRegionScalar()),
7581+
builder.createImm(allOneMask, Type_UW), InstOpt_WriteEnable);
7582+
bb->insert(it, andInst);
7583+
cmpSrc0Flag = tmpFlag;
7584+
}
7585+
G4_CondMod* condMod = builder.createCondMod(pred->getControl() == PRED_ANY_WHOLE ? Mod_ne : Mod_e,
7586+
tmpFlag->getRegVar(), 0);
7587+
7588+
G4_Imm* immVal = builder.createImm(pred->getControl() == PRED_ANY_WHOLE ? 0 : allOneMask, flagType);
7589+
// cmp needs to be as wide as the original inst but is uniform and NoMask otherwise
7590+
auto cmpInst = builder.createInst(nullptr, G4_cmp, condMod, false, inst->getExecSize(), builder.createNullDst(flagType),
7591+
builder.createSrcRegRegion(Mod_src_undef, Direct, cmpSrc0Flag->getRegVar(), 0, 0, builder.getRegionScalar(), flagType),
7592+
immVal, InstOpt_WriteEnable);
7593+
bb->insert(it, cmpInst);
7594+
inst->setPredicate(builder.createPredicate(pred->getState(), tmpFlag->getRegVar(), 0));
7595+
}
7596+
}
7597+
}
7598+

visa/HWConformity.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ namespace vISA
204204

205205
void fixVxHFloat64b(INST_LIST_ITER it, G4_BB* bb);
206206

207+
void fixPredCtrl(INST_LIST_ITER it, G4_BB* bb);
208+
207209
public:
208210
HWConformity(IR_Builder& b, G4_Kernel &k, vISA::Mem_Manager& m) :
209211
builder(b), kernel(k), mem(m)

visa/VISAKernelImpl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2174,7 +2174,7 @@ int VISAKernelImpl::CreateVISAPredicateOperand(VISA_PredOpnd *& cisa_opnd, VISA_
21742174
//with bool size in bytes was incorrect, now that it's correct it returns "correct"
21752175
//number of elements. Except it thinks each element is a two bytes.
21762176
//we want each element in as a boolean.
2177-
G4_Predicate_Control predCtrl = vISAPredicateToG4Predicate( (VISA_PREDICATE_CONTROL)cntrl, dcl->getNumberFlagElements());
2177+
G4_Predicate_Control predCtrl = m_builder->vISAPredicateToG4Predicate(cntrl, dcl->getNumberFlagElements());
21782178

21792179
cisa_opnd->g4opnd = m_builder->createPredicate(
21802180
(state == PredState_INVERSE) ? PredState_Minus : PredState_Plus,

0 commit comments

Comments
 (0)