@@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5050    return  MRI.getType (Reg) == LLT::scalar (32 );
5151  case  S64:
5252    return  MRI.getType (Reg) == LLT::scalar (64 );
53+   case  P0:
54+     return  MRI.getType (Reg) == LLT::pointer (0 , 64 );
5355  case  P1:
5456    return  MRI.getType (Reg) == LLT::pointer (1 , 64 );
5557  case  P3:
@@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5860    return  MRI.getType (Reg) == LLT::pointer (4 , 64 );
5961  case  P5:
6062    return  MRI.getType (Reg) == LLT::pointer (5 , 32 );
63+   case  V4S32:
64+     return  MRI.getType (Reg) == LLT::fixed_vector (4 , 32 );
6165  case  B32:
6266    return  MRI.getType (Reg).getSizeInBits () == 32 ;
6367  case  B64:
@@ -78,6 +82,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
7882    return  MRI.getType (Reg) == LLT::scalar (32 ) && MUI.isUniform (Reg);
7983  case  UniS64:
8084    return  MRI.getType (Reg) == LLT::scalar (64 ) && MUI.isUniform (Reg);
85+   case  UniP0:
86+     return  MRI.getType (Reg) == LLT::pointer (0 , 64 ) && MUI.isUniform (Reg);
8187  case  UniP1:
8288    return  MRI.getType (Reg) == LLT::pointer (1 , 64 ) && MUI.isUniform (Reg);
8389  case  UniP3:
@@ -104,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
104110    return  MRI.getType (Reg) == LLT::scalar (32 ) && MUI.isDivergent (Reg);
105111  case  DivS64:
106112    return  MRI.getType (Reg) == LLT::scalar (64 ) && MUI.isDivergent (Reg);
113+   case  DivP0:
114+     return  MRI.getType (Reg) == LLT::pointer (0 , 64 ) && MUI.isDivergent (Reg);
107115  case  DivP1:
108116    return  MRI.getType (Reg) == LLT::pointer (1 , 64 ) && MUI.isDivergent (Reg);
109117  case  DivP3:
@@ -433,16 +441,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
433441  addRulesForGOpcs ({G_XOR, G_OR, G_AND}, StandardB)
434442      .Any ({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
435443      .Any ({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
444+       .Div (B32, {{VgprB32}, {VgprB32, VgprB32}})
445+       .Uni (B64, {{SgprB64}, {SgprB64, SgprB64}})
436446      .Div (B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
437447
438448  addRulesForGOpcs ({G_SHL}, Standard)
449+       .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
439450      .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
440451      .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
441452
442453  //  Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
443454  //  and G_FREEZE here, rest is trivially regbankselected earlier
455+   addRulesForGOpcs ({G_IMPLICIT_DEF}).Any ({{UniS1}, {{Sgpr32Trunc}, {}}});
444456  addRulesForGOpcs ({G_CONSTANT})
445457      .Any ({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
458+   addRulesForGOpcs ({G_FREEZE}).Any ({{DivS1}, {{Vcc}, {Vcc}}});
446459
447460  addRulesForGOpcs ({G_ICMP})
448461      .Any ({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
@@ -473,6 +486,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
473486
474487  addRulesForGOpcs ({G_ZEXT, G_SEXT})
475488      .Any ({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
489+       .Any ({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
476490      .Any ({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
477491      .Any ({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}});
478492
@@ -527,9 +541,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
527541
528542  //  clang-format off
529543  addRulesForGOpcs ({G_LOAD})
544+       .Any ({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}})
545+ 
530546      .Any ({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
531547      .Any ({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
532548      .Any ({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
549+       .Any ({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
533550      .Any ({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
534551      .Any ({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
535552
@@ -558,15 +575,26 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
558575  //  clang-format on
559576
560577  addRulesForGOpcs ({G_AMDGPU_BUFFER_LOAD}, Vector)
578+       .Div (S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
579+       .Uni (S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
561580      .Div (V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
562581      .Uni (V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
563582
564583  addRulesForGOpcs ({G_STORE})
584+       .Any ({{S32, P0}, {{}, {Vgpr32, VgprP0}}})
565585      .Any ({{S32, P1}, {{}, {Vgpr32, VgprP1}}})
566586      .Any ({{S64, P1}, {{}, {Vgpr64, VgprP1}}})
567587      .Any ({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}});
568588
569-   addRulesForGOpcs ({G_PTR_ADD}).Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
589+   addRulesForGOpcs ({G_AMDGPU_BUFFER_STORE})
590+       .Any ({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
591+ 
592+   addRulesForGOpcs ({G_PTR_ADD})
593+       .Any ({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
594+       .Any ({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
595+       .Any ({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}});
596+ 
597+   addRulesForGOpcs ({G_INTTOPTR}).Any ({{UniP4}, {{SgprP4}, {Sgpr64}}});
570598
571599  addRulesForGOpcs ({G_ABS}, Standard).Uni (S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
572600
@@ -582,15 +610,24 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
582610      .Any ({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
583611
584612  addRulesForGOpcs ({G_UITOFP})
613+       .Any ({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
585614      .Any ({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
586615      .Any ({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
587616
588617  using  namespace  Intrinsic ; 
589618
619+   addRulesForIOpcs ({amdgcn_s_getpc}).Any ({{UniS64, _}, {{Sgpr64}, {None}}});
620+ 
590621  //  This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
591622  addRulesForIOpcs ({amdgcn_end_cf}).Any ({{_, S32}, {{}, {None, Sgpr32}}});
592623
593624  addRulesForIOpcs ({amdgcn_if_break}, Standard)
594625      .Uni (S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
595626
627+   addRulesForIOpcs ({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
628+       .Div (S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
629+ 
630+   addRulesForIOpcs ({amdgcn_readfirstlane})
631+       .Any ({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
632+ 
596633} //  end initialize rules
0 commit comments