@@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5050 return MRI.getType (Reg) == LLT::scalar (32 );
5151 case S64:
5252 return MRI.getType (Reg) == LLT::scalar (64 );
53+ case P0:
54+ return MRI.getType (Reg) == LLT::pointer (0 , 64 );
5355 case P1:
5456 return MRI.getType (Reg) == LLT::pointer (1 , 64 );
5557 case P3:
@@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5860 return MRI.getType (Reg) == LLT::pointer (4 , 64 );
5961 case P5:
6062 return MRI.getType (Reg) == LLT::pointer (5 , 32 );
63+ case V4S32:
64+ return MRI.getType (Reg) == LLT::fixed_vector (4 , 32 );
6165 case B32:
6266 return MRI.getType (Reg).getSizeInBits () == 32 ;
6367 case B64:
@@ -315,13 +319,15 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
315319 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
316320 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID ();
317321 if (!IRulesAlias.contains (IntrID)) {
322+ MI.dump ();
318323 LLVM_DEBUG (dbgs () << " MI: " ; MI.dump (););
319324 llvm_unreachable (" No rules defined for intrinsic opcode" );
320325 }
321326 return IRules.at (IRulesAlias.at (IntrID));
322327 }
323328
324329 if (!GRulesAlias.contains (Opc)) {
330+ MI.dump ();
325331 LLVM_DEBUG (dbgs () << " MI: " ; MI.dump (););
326332 llvm_unreachable (" No rules defined for generic opcode" );
327333 }
@@ -431,16 +437,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
431437 addRulesForGOpcs ({G_XOR, G_OR, G_AND}, StandardB)
432438 .Any ({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
433439 .Any ({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
440+ .Div (B32, {{VgprB32}, {VgprB32, VgprB32}})
441+ .Uni (B64, {{SgprB64}, {SgprB64, SgprB64}})
434442 .Div (B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
435443
436444 addRulesForGOpcs ({G_SHL}, Standard)
445+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
437446 .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
438447 .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
439448
440449 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
441450 // and G_FREEZE here, rest is trivially regbankselected earlier
451+ addRulesForGOpcs ({G_IMPLICIT_DEF}).Any ({{UniS1}, {{Sgpr32Trunc}, {}}});
442452 addRulesForGOpcs ({G_CONSTANT})
443453 .Any ({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
454+ addRulesForGOpcs ({G_FREEZE}).Any ({{DivS1}, {{Vcc}, {Vcc}}});
444455
445456 addRulesForGOpcs ({G_ICMP})
446457 .Any ({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
@@ -471,6 +482,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
471482
472483 addRulesForGOpcs ({G_ZEXT, G_SEXT})
473484 .Any ({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
485+ .Any ({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
474486 .Any ({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
475487 .Any ({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}});
476488
@@ -528,6 +540,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
528540 .Any ({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
529541 .Any ({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
530542 .Any ({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
543+ .Any ({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
531544 .Any ({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
532545 .Any ({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
533546
@@ -556,15 +569,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
556569 // clang-format on
557570
558571 addRulesForGOpcs ({G_AMDGPU_BUFFER_LOAD}, Vector)
572+ .Div (S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
573+ .Uni (S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
559574 .Div (V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
560575 .Uni (V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
561576
562577 addRulesForGOpcs ({G_STORE})
578+ .Any ({{S32, P0}, {{}, {Vgpr32, VgprP0}}})
563579 .Any ({{S32, P1}, {{}, {Vgpr32, VgprP1}}})
564580 .Any ({{S64, P1}, {{}, {Vgpr64, VgprP1}}})
565581 .Any ({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}});
566582
567- addRulesForGOpcs ({G_PTR_ADD}).Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
583+ addRulesForGOpcs ({G_AMDGPU_BUFFER_STORE})
584+ .Any ({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
585+
586+ addRulesForGOpcs ({G_PTR_ADD})
587+ .Any ({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
588+ .Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
589+
590+ addRulesForGOpcs ({G_INTTOPTR}).Any ({{UniP4}, {{SgprP4}, {Sgpr64}}});
568591
569592 addRulesForGOpcs ({G_ABS}, Standard).Uni (S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
570593
@@ -585,10 +608,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
585608
586609 using namespace Intrinsic ;
587610
611+ addRulesForIOpcs ({amdgcn_s_getpc}).Any ({{UniS64, _}, {{Sgpr64}, {None}}});
612+
588613 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
589614 addRulesForIOpcs ({amdgcn_end_cf}).Any ({{_, S32}, {{}, {None, Sgpr32}}});
590615
591616 addRulesForIOpcs ({amdgcn_if_break}, Standard)
592617 .Uni (S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
593618
619+ addRulesForIOpcs ({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
620+ .Div (S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
621+
622+ addRulesForIOpcs ({amdgcn_readfirstlane})
623+ .Any ({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
624+
594625} // end initialize rules
0 commit comments