Skip to content

[llvm][GISel] Use computeKnownFPClass #141484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
@@ -346,11 +346,12 @@ isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
LLVM_ABI bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN = false);
GISelValueTracking *ValueTracking, bool SNaN = false);

/// Returns true if \p Val can be assumed to never be a signaling NaN.
inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
return isKnownNeverNaN(Val, MRI, true);
inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI,
GISelValueTracking *ValueTracking) {
return isKnownNeverNaN(Val, MRI, ValueTracking, true);
}

LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF,
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
@@ -6519,8 +6519,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect(
CombinerHelper::SelectPatternNaNBehaviour
CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
bool IsOrderedComparison) const {
bool LHSSafe = isKnownNeverNaN(LHS, MRI);
bool RHSSafe = isKnownNeverNaN(RHS, MRI);
bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT);
bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT);
// Completely unsafe.
if (!LHSSafe && !RHSSafe)
return SelectPatternNaNBehaviour::NOT_APPLICABLE;
145 changes: 141 additions & 4 deletions llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
Original file line number Diff line number Diff line change
@@ -937,8 +937,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,

if (KnownSrc.isKnownNeverPosInfinity())
Known.knownNot(fcPosInf);
if (KnownSrc.isKnownNever(fcSNan))
Known.knownNot(fcSNan);

// Any negative value besides -0 returns a nan.
if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
@@ -961,25 +959,27 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FTAN:
case TargetOpcode::G_FSINCOS: {
// Return NaN on infinite inputs.
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

Known.knownNot(fcInf);

if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
Known.knownNot(fcNan);
break;
}
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FMINIMUMNUM: {
Register LHS = MI.getOperand(1).getReg();
@@ -994,13 +994,23 @@ void GISelValueTracking::computeKnownFPClass(Register R,
bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
Known = KnownLHS | KnownRHS;

if (Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
Opcode == TargetOpcode::G_FMINNUM_IEEE)
Known.knownNot(fcSNan);

// If either operand is not NaN, the result is not NaN.
if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
Opcode == TargetOpcode::G_FMAXNUM ||
Opcode == TargetOpcode::G_FMINIMUMNUM ||
Opcode == TargetOpcode::G_FMAXIMUMNUM))
Known.knownNot(fcNan);

if ((Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
((KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNever(fcSNan)) ||
(KnownLHS.isKnownNever(fcSNan) && KnownRHS.isKnownNeverNaN())))
Known.knownNot(fcNan);

if (Opcode == TargetOpcode::G_FMAXNUM ||
Opcode == TargetOpcode::G_FMAXIMUMNUM ||
Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
@@ -1089,6 +1099,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FCANONICALIZE: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

@@ -1190,6 +1201,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverNaN()) {
Known.knownNot(fcNan);
Known.signBitMustBeZero();
} else {
Known.knownNot(fcSNan);
}

break;
@@ -1300,6 +1313,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownFPClass KnownLHS, KnownRHS;

bool WantNegative =
(Opcode == TargetOpcode::G_FADD ||
Opcode == TargetOpcode::G_STRICT_FADD) &&
@@ -1364,6 +1378,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_STRICT_FMUL: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();

// X * X is always non-negative or a NaN.
if (LHS == RHS)
Known.knownNot(fcNegative);
@@ -1494,6 +1509,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register Src = MI.getOperand(1).getReg();
// Infinity, nan and zero propagate from source.
computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
Known.knownNot(fcSNan);

LLT DstTy = MRI.getType(Dst).getScalarType();
const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
@@ -1517,6 +1533,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FPTRUNC: {
computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
Depth);
Known.knownNot(fcSNan);
break;
}
case TargetOpcode::G_SITOFP:
@@ -1698,6 +1715,126 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
break;
}
case TargetOpcode::G_FATAN: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

if (KnownSrc.isKnownAlways(fcInf))
Known.KnownFPClasses = fcNan;

break;
}
case TargetOpcode::G_FATAN2: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownFPClass KnownLHS;
KnownFPClass KnownRHS;

computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
Depth + 1);

computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
Depth + 1);

if (!KnownRHS.isKnownNeverNaN() || !KnownRHS.isKnownNeverNaN())
break;

if (KnownLHS.isKnownAlways(fcZero)) {
// atan2(+-0, −0) -> +-pi
// atan2(+-0, x) -> +-pi for x < 0
if (KnownRHS.isKnownAlways(fcNegFinite)) {
Known.KnownFPClasses = fcFinite;
break;
}

// atan2(+-0, +0) -> +-0
// atan2(+-0, x) -> +-0 for x > 0
if (KnownRHS.isKnownAlways(fcPosFinite)) {
Known.KnownFPClasses = fcZero;
break;
}
}

if (KnownRHS.isKnownAlways(fcZero)) {
// atan2(y, +-0) -> -pi/2 for y < 0
if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcNegFinite)) {
Known.KnownFPClasses = fcNegFinite;
break;
}

// atan2(y, +-0) -> +pi/2 for y > 0
if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcPosFinite)) {
Known.KnownFPClasses = fcPosFinite;
break;
}
}

if (KnownLHS.isKnownAlways(fcPosFinite) && KnownLHS.isKnownNeverZero()) {
// atan2(+-y, -inf) -> +-pi for finite y > 0
if (KnownRHS.isKnownAlways(fcNegInf)) {
Known.KnownFPClasses = fcFinite;
break;
}

// atan2(+-y, +inf) -> +-0 for finite y > 0
if (KnownRHS.isKnownAlways(fcPosInf)) {
Known.KnownFPClasses = fcZero;
break;
}
}

if (KnownLHS.isKnownAlways(fcInf)) {
// atan2(+-inf, x) -> +-pi/2 for finite x
// atan2(+-inf, -inf) -> +-3pi/4
// atan2(+-inf, +inf) -> +-pi/4
Known.KnownFPClasses = fcFinite;
break;
}

break;
}
case TargetOpcode::G_FCOSH: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

// cosh(+-inf) -> +inf
if (KnownSrc.isKnownAlways(fcInf))
Known.KnownFPClasses = fcPosInf;

break;
}
case TargetOpcode::G_FSINH: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

// sinh(±∞) is ±∞
if (KnownSrc.isKnownAlways(fcInf))
Known.KnownFPClasses = fcInf;

break;
}
case TargetOpcode::G_FTANH: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;

computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);

// tanh(+-inf) is +-1
if (KnownSrc.isKnownAlways(fcInf))
Known.KnownFPClasses = fcFinite;

break;
}
}
}

4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
@@ -8179,10 +8179,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
// Note this must be done here, and not as an optimization combine in the
// absence of a dedicate quiet-snan instruction as we're using an
// omni-purpose G_FCANONICALIZE.
if (!isKnownNeverSNaN(Src0, MRI))
if (!isKnownNeverSNaN(Src0, MRI, VT))
Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);

if (!isKnownNeverSNaN(Src1, MRI))
if (!isKnownNeverSNaN(Src1, MRI, VT))
Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
}

78 changes: 6 additions & 72 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -807,7 +808,7 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
}

bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN) {
GISelValueTracking *VT, bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
if (!DefMI)
return false;
@@ -816,78 +817,11 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;

// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
return !FPVal->getValueAPF().isNaN() ||
(SNaN && !FPVal->getValueAPF().isSignaling());
}

if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
for (const auto &Op : DefMI->uses())
if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
return false;
return true;
}
KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
if (SNaN)
return FPClass.isKnownNever(fcSNan);

switch (DefMI->getOpcode()) {
default:
break;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FTAN:
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
if (SNaN)
return true;

// TODO: Need isKnownNeverInfinity
return false;
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE: {
if (SNaN)
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
(isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
}
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
}
}

if (SNaN) {
// FP operations quiet. For now, just handle the ones inserted during
// legalization.
switch (DefMI->getOpcode()) {
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCANONICALIZE:
return true;
default:
return false;
}
}

return false;
return FPClass.isKnownNeverNaN();
}

Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
@@ -859,7 +859,7 @@ class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
return CurDAG->isKnownNeverNaN(SDValue(N,0));
}];
let GISelPredicateCode = [{
return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT);
}];
}

12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
Original file line number Diff line number Diff line change
@@ -261,7 +261,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
// nodes(max/min) have same behavior when one input is NaN and other isn't.
// Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
// also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) {
// Don't fold single use constant that can't be inlined.
if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
(!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
@@ -291,8 +291,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
// For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
// to 0.0 requires dx10_clamp = true.
if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
isKnownNeverSNaN(Val, MRI)) ||
isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
isKnownNeverSNaN(Val, MRI, VT)) ||
isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) {
Reg = Val;
return true;
}
@@ -329,6 +329,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
Register Val = Src0->getOperand(0).getReg();

auto isOp3Zero = [&]() {
if (MI.getNumOperands() < 5)
return false;
MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
@@ -338,9 +340,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
// no NaN inputs. Most often MI is marked with nnan fast math flag.
// For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
// when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) ||
(getIEEE() && getDX10Clamp() &&
(isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
(isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) {
Reg = Val;
return true;
}
18 changes: 13 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
Original file line number Diff line number Diff line change
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
Comment on lines +77 to +78
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regression

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All regressions stem from the difference in handling SNaNs. Not sure how to proceed.

; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
Comment on lines +88 to 91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regression

%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
32 changes: 13 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
Original file line number Diff line number Diff line change
@@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -305,9 +310,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -330,8 +333,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
@@ -355,9 +357,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -367,9 +367,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
@@ -381,9 +379,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -393,9 +389,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
22 changes: 8 additions & 14 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
Original file line number Diff line number Diff line change
@@ -456,15 +456,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +487,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -509,7 +505,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -522,16 +518,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -543,7 +537,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
Original file line number Diff line number Diff line change
@@ -162,8 +162,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
Original file line number Diff line number Diff line change
@@ -441,13 +441,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
@@ -481,13 +476,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -522,14 +513,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -564,13 +550,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
Original file line number Diff line number Diff line change
@@ -469,11 +469,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -502,11 +501,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -536,11 +534,10 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = G_FCANONICALIZE %0