@@ -1352,6 +1352,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1352
1352
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1353
1353
}
1354
1354
}
1355
+ if (Subtarget->hasFullFP16())
1356
+ setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
1357
+
1355
1358
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1356
1359
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1357
1360
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@@ -16080,9 +16083,19 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
16080
16083
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
16081
16084
SelectionDAG &DAG) const {
16082
16085
SDValue Src = Op.getOperand(0);
16086
+ EVT SrcVT = Src.getValueType();
16087
+
16088
+ // Scalarize v2f16 to turn it into a faddp. This will be more efficient than
16089
+ // widening by inserting zeroes.
16090
+ if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD &&
16091
+ SrcVT == MVT::v2f16) {
16092
+ SDLoc DL(Op);
16093
+ return DAG.getNode(ISD::FADD, DL, MVT::f16,
16094
+ DAG.getExtractVectorElt(DL, MVT::f16, Src, 0),
16095
+ DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
16096
+ }
16083
16097
16084
16098
// Try to lower fixed length reductions to SVE.
16085
- EVT SrcVT = Src.getValueType();
16086
16099
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16087
16100
Op.getOpcode() == ISD::VECREDUCE_AND ||
16088
16101
Op.getOpcode() == ISD::VECREDUCE_OR ||
0 commit comments