-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][SDAG] Legalise v2i32 or/xor/and instructions to make use of 64-bit wide instructions #140694
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AMDGPU][SDAG] Legalise v2i32 or/xor/and instructions to make use of 64-bit wide instructions #140694
Changes from all commits
f0c57fd
1a96d4b
afbe7cc
d547305
fa2fa04
601beec
50ecdff
a5ba93d
d759dda
bfb37e4
55a643d
12f29b7
b3e6e22
eb851bf
dd52635
d4a9fb6
3a2870e
71aa858
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -438,6 +438,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v2i32, MVT::v2f32}, Legal); | ||
} | ||
|
||
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::v2i32, Legal); | ||
// Prevent SELECT v2i32 from being implemented with the above bitwise ops and | ||
// instead lower to cndmask in SITargetLowering::LowerSELECT(). | ||
setOperationAction(ISD::SELECT, MVT::v2i32, Custom); | ||
// Enable MatchRotate to produce ISD::ROTR, which is later transformed to | ||
// alignbit. | ||
setOperationAction(ISD::ROTR, MVT::v2i32, Custom); | ||
|
||
setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16, MVT::v4bf16}, | ||
Custom); | ||
|
||
|
@@ -6041,6 +6049,20 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op, | |
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi); | ||
} | ||
|
||
// Enable lowering of ROTR for vxi32 types. This is a workaround for a | ||
// regression whereby extra unnecessary instructions were added to codegen | ||
// for rotr operations, casued by legalising v2i32 or. This resulted in extra | ||
// instructions to extract the result from the vector. | ||
SDValue SITargetLowering::lowerROTR(SDValue Op, SelectionDAG &DAG) const { | ||
[[maybe_unused]] EVT VT = Op.getValueType(); | ||
|
||
assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v8i32 || | ||
VT == MVT::v16i32) && | ||
"Unexpected ValueType."); | ||
|
||
return DAG.UnrollVectorOp(Op.getNode()); | ||
} | ||
|
||
// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the | ||
// wider vector type is legal. | ||
SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, | ||
|
@@ -6232,6 +6254,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |
return lowerGET_FPENV(Op, DAG); | ||
case ISD::SET_FPENV: | ||
return lowerSET_FPENV(Op, DAG); | ||
case ISD::ROTR: | ||
return lowerROTR(Op, DAG); | ||
} | ||
return SDValue(); | ||
} | ||
|
@@ -13136,6 +13160,47 @@ SDValue SITargetLowering::performOrCombine(SDNode *N, | |
} | ||
} | ||
|
||
// Detect identity v2i32 OR and replace with identity source node. | ||
// Specifically an Or that has operands constructed from the same source node | ||
// via extract_vector_elt and build_vector. I.E. | ||
// v2i32 or( | ||
// v2i32 build_vector( | ||
// i32 extract_elt(%IdentitySrc, 0), | ||
// i32 0 | ||
// ), | ||
// v2i32 build_vector( | ||
// i32 0, | ||
// i32 extract_elt(%IdentitySrc, 1) | ||
// ) ) | ||
// => | ||
// v2i32 %IdentitySrc | ||
|
||
if (VT == MVT::v2i32 && LHS->getOpcode() == ISD::BUILD_VECTOR && | ||
RHS->getOpcode() == ISD::BUILD_VECTOR) { | ||
|
||
ConstantSDNode *LC = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); | ||
ConstantSDNode *RC = dyn_cast<ConstantSDNode>(RHS->getOperand(0)); | ||
|
||
// Test for and normalise build vectors. | ||
if (LC && RC && LC->getZExtValue() == 0 && RC->getZExtValue() == 0) { | ||
|
||
// Get the extract_vector_element operands. | ||
SDValue LEVE = LHS->getOperand(0); | ||
SDValue REVE = RHS->getOperand(1); | ||
|
||
if (LEVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||
REVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | ||
// Check that different elements from the same vector are | ||
// extracted. | ||
if (LEVE->getOperand(0) == REVE->getOperand(0) && | ||
LEVE->getOperand(1) != REVE->getOperand(1)) { | ||
SDValue IdentitySrc = LEVE.getOperand(0); | ||
return IdentitySrc; | ||
} | ||
} | ||
} | ||
} | ||
|
||
if (VT != MVT::i64 || DCI.isBeforeLegalizeOps()) | ||
return SDValue(); | ||
|
||
|
@@ -13180,13 +13245,42 @@ SDValue SITargetLowering::performXorCombine(SDNode *N, | |
if (SDValue RV = reassociateScalarOps(N, DCI.DAG)) | ||
return RV; | ||
|
||
SelectionDAG &DAG = DCI.DAG; | ||
EVT VT = N->getValueType(0); | ||
SDValue LHS = N->getOperand(0); | ||
SDValue RHS = N->getOperand(1); | ||
|
||
// Fold the fneg of a vselect into the v2 vselect operands. | ||
// xor (vselect c, a, b), 0x80000000 -> | ||
// bitcast (vselect c, (fneg (bitcast a)), (fneg (bitcast b))) | ||
if (VT == MVT::v2i32 && LHS.getNumOperands() > 1) { | ||
chrisjbris marked this conversation as resolved.
Show resolved
Hide resolved
chrisjbris marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
const ConstantSDNode *CRHS0 = dyn_cast<ConstantSDNode>(RHS.getOperand(0)); | ||
const ConstantSDNode *CRHS1 = dyn_cast<ConstantSDNode>(RHS.getOperand(1)); | ||
SDValue LHS_0 = LHS.getOperand(0); | ||
SDValue LHS_1 = LHS.getOperand(1); | ||
|
||
if (LHS.getOpcode() == ISD::VSELECT && CRHS0 && | ||
CRHS0->getAPIntValue().isSignMask() && | ||
shouldFoldFNegIntoSrc(N, LHS_0) && CRHS1 && | ||
CRHS1->getAPIntValue().isSignMask() && | ||
shouldFoldFNegIntoSrc(N, LHS_1)) { | ||
|
||
SDLoc DL(N); | ||
SDValue CastLHS = | ||
DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(1)); | ||
SDValue CastRHS = | ||
DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(2)); | ||
SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastLHS); | ||
SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastRHS); | ||
SDValue NewSelect = DAG.getNode(ISD::VSELECT, DL, MVT::v2f32, | ||
LHS->getOperand(0), FNegLHS, FNegRHS); | ||
return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect); | ||
} | ||
Comment on lines
+13269
to
+13279
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to handle using fneg/fabs on integer select as a separate patch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you suggest we prevent the test regression without including this code in this diff? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://godbolt.org/z/9aocMa381 directly do the work to support source modifiers as integer on select |
||
} | ||
|
||
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS); | ||
SelectionDAG &DAG = DCI.DAG; | ||
|
||
EVT VT = N->getValueType(0); | ||
if (CRHS && VT == MVT::i64) { | ||
if (SDValue Split = | ||
splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BUILD_VECTOR doesn't support source modifiers. It wouldn't be specific to this case either, it would be for all types and all opcodes that support source modifiers in the input. But instead of pretending build_vector supports these, there should be combines involving build_vector to expose the modifiers to the real use/def instructions