Skip to content

Commit 7f18b35

Browse files
committed
Fix rotr regression by adding lowerRotr() on the legalizer codepath
1 parent 8913345 commit 7f18b35

File tree

3 files changed

+43
-23
lines changed

3 files changed

+43
-23
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5944,6 +5944,35 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
59445944
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
59455945
}
59465946

5947+
// Enable lowering of ROTR for vxi32 types.
5948+
SDValue SITargetLowering::lowerROTR(SDValue Op,
5949+
SelectionDAG &DAG) const {
5950+
unsigned Opc = Op.getOpcode();
5951+
EVT VT = Op.getValueType();
5952+
assert(Opc == ISD::ROTR && "Expected ROTR Opcode for lowerROTR.");
5953+
5954+
assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
5955+
"Unexpected ValueType.");
5956+
5957+
unsigned VectorSize = VT.getVectorNumElements();
5958+
EVT ElementType = VT.getVectorElementType();
5959+
SDLoc SL(Op);
5960+
auto LHS = Op->getOperand(0);
5961+
auto RHS = Op->getOperand(1);
5962+
5963+
SmallVector<SDValue, 4> RotateTargets;
5964+
SmallVector<SDValue, 4> RotateSizes;
5965+
SmallVector<SDValue, 4> Ops;
5966+
5967+
DAG.ExtractVectorElements(LHS, RotateTargets, 0,VectorSize, ElementType);
5968+
DAG.ExtractVectorElements(RHS, RotateSizes, 0,VectorSize, ElementType);
5969+
5970+
for(unsigned i=0; i < VectorSize; i++)
5971+
Ops.push_back(DAG.getNode(ISD::ROTR, SL, ElementType, RotateTargets[i], RotateSizes[i], Op->getFlags()));
5972+
5973+
return DAG.getBuildVector(VT, SL, Ops);
5974+
}
5975+
59475976
// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
59485977
// wider vector type is legal.
59495978
SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
@@ -6130,6 +6159,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
61306159
return lowerGET_FPENV(Op, DAG);
61316160
case ISD::SET_FPENV:
61326161
return lowerSET_FPENV(Op, DAG);
6162+
case ISD::ROTR:
6163+
return lowerROTR(Op, DAG);
61336164
}
61346165
return SDValue();
61356166
}

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
440440
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
441441
SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
442442
SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const;
443+
SDValue lowerROTR(SDValue Op, SelectionDAG &DAG) const;
443444

444445
Register getRegisterByName(const char* RegName, LLT VT,
445446
const MachineFunction &MF) const override;

llvm/test/CodeGen/AMDGPU/rotr.ll

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -121,29 +121,17 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
121121
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
122122
; GFX10-NEXT: s_endpgm
123123
;
124-
; GFX11-TRUE16-LABEL: rotr_v2i32:
125-
; GFX11-TRUE16: ; %bb.0: ; %entry
126-
; GFX11-TRUE16-NEXT: s_clause 0x1
127-
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
128-
; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
129-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
130-
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
131-
; GFX11-TRUE16-NEXT: v_alignbit_b32 v1, s1, s1, s2
132-
; GFX11-TRUE16-NEXT: v_alignbit_b32 v0, s0, s0, s2
133-
; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[4:5]
134-
; GFX11-TRUE16-NEXT: s_endpgm
135-
;
136-
; GFX11-FAKE16-LABEL: rotr_v2i32:
137-
; GFX11-FAKE16: ; %bb.0: ; %entry
138-
; GFX11-FAKE16-NEXT: s_clause 0x1
139-
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
140-
; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
141-
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
142-
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
143-
; GFX11-FAKE16-NEXT: v_alignbit_b32 v1, s1, s1, s3
144-
; GFX11-FAKE16-NEXT: v_alignbit_b32 v0, s0, s0, s2
145-
; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[4:5]
146-
; GFX11-FAKE16-NEXT: s_endpgm
124+
; GFX11-LABEL: rotr_v2i32:
125+
; GFX11: ; %bb.0: ; %entry
126+
; GFX11-NEXT: s_clause 0x1
127+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
128+
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
129+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
130+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
131+
; GFX11-NEXT: v_alignbit_b32 v1, s1, s1, s3
132+
; GFX11-NEXT: v_alignbit_b32 v0, s0, s0, s2
133+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
134+
; GFX11-NEXT: s_endpgm
147135
entry:
148136
%tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
149137
%tmp1 = shl <2 x i32> %x, %tmp0

0 commit comments

Comments
 (0)