[AMDGPU] Fold fmed3 when inputs include infinity (#144824)

fairywreath · web-flow · commit 9f3931b659f6 · 2025-06-24T21:44:17.000+09:00
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1031,6 +1031,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     // s1 _nan: min(s0, s2)
     // s2 _nan: min(s0, s1)
 
+    // med3 behavior with infinity
+    // s0 +inf: max(s1, s2)
+    // s1 +inf: max(s0, s2)
+    // s2 +inf: max(s0, s1)
+    // s0 -inf: min(s1, s2)
+    // s1 -inf: min(s0, s2)
+    // s2 -inf: min(s0, s1)
+
     // Checking for NaN before canonicalization provides better fidelity when
     // mapping other operations onto fmed3 since the order of operands is
     // unchanged.
@@ -1039,51 +1047,64 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     const APFloat *ConstSrc1 = nullptr;
     const APFloat *ConstSrc2 = nullptr;
 
-    // TODO: Also can fold to 2 operands with infinities.
-    if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
+    if ((match(Src0, m_APFloat(ConstSrc0)) &&
+         (ConstSrc0->isNaN() || ConstSrc0->isInfinity())) ||
         isa<UndefValue>(Src0)) {
+      const bool IsPosInfinity = ConstSrc0 && ConstSrc0->isPosInfinity();
       switch (fpenvIEEEMode(II)) {
       case KnownIEEEMode::On:
         // TODO: If Src2 is snan, does it need quieting?
-        if (ConstSrc0 && ConstSrc0->isSignaling())
+        if (ConstSrc0 && ConstSrc0->isNaN() && ConstSrc0->isSignaling())
           return IC.replaceInstUsesWith(II, Src2);
-        V = IC.Builder.CreateMinNum(Src1, Src2);
+
+        V = IsPosInfinity ? IC.Builder.CreateMaxNum(Src1, Src2)
+                          : IC.Builder.CreateMinNum(Src1, Src2);
         break;
       case KnownIEEEMode::Off:
-        V = IC.Builder.CreateMinimumNum(Src1, Src2);
+        V = IsPosInfinity ? IC.Builder.CreateMaximumNum(Src1, Src2)
+                          : IC.Builder.CreateMinimumNum(Src1, Src2);
         break;
       case KnownIEEEMode::Unknown:
         break;
       }
-    } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
+    } else if ((match(Src1, m_APFloat(ConstSrc1)) &&
+                (ConstSrc1->isNaN() || ConstSrc1->isInfinity())) ||
                isa<UndefValue>(Src1)) {
+      const bool IsPosInfinity = ConstSrc1 && ConstSrc1->isPosInfinity();
       switch (fpenvIEEEMode(II)) {
       case KnownIEEEMode::On:
         // TODO: If Src2 is snan, does it need quieting?
-        if (ConstSrc1 && ConstSrc1->isSignaling())
+        if (ConstSrc1 && ConstSrc1->isNaN() && ConstSrc1->isSignaling())
           return IC.replaceInstUsesWith(II, Src2);
 
-        V = IC.Builder.CreateMinNum(Src0, Src2);
+        V = IsPosInfinity ? IC.Builder.CreateMaxNum(Src0, Src2)
+                          : IC.Builder.CreateMinNum(Src0, Src2);
         break;
       case KnownIEEEMode::Off:
-        V = IC.Builder.CreateMinimumNum(Src0, Src2);
+        V = IsPosInfinity ? IC.Builder.CreateMaximumNum(Src0, Src2)
+                          : IC.Builder.CreateMinimumNum(Src0, Src2);
         break;
       case KnownIEEEMode::Unknown:
         break;
       }
-    } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
+    } else if ((match(Src2, m_APFloat(ConstSrc2)) &&
+                (ConstSrc2->isNaN() || ConstSrc2->isInfinity())) ||
                isa<UndefValue>(Src2)) {
       switch (fpenvIEEEMode(II)) {
       case KnownIEEEMode::On:
-        if (ConstSrc2 && ConstSrc2->isSignaling()) {
+        if (ConstSrc2 && ConstSrc2->isNaN() && ConstSrc2->isSignaling()) {
           auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
           return IC.replaceInstUsesWith(II, Quieted);
         }
 
-        V = IC.Builder.CreateMinNum(Src0, Src1);
+        V = (ConstSrc2 && ConstSrc2->isPosInfinity())
+                ? IC.Builder.CreateMaxNum(Src0, Src1)
+                : IC.Builder.CreateMinNum(Src0, Src1);
         break;
       case KnownIEEEMode::Off:
-        V = IC.Builder.CreateMaximumNum(Src0, Src1);
+        V = (ConstSrc2 && ConstSrc2->isNegInfinity())
+                ? IC.Builder.CreateMinimumNum(Src0, Src1)
+                : IC.Builder.CreateMaximumNum(Src0, Src1);
         break;
       case KnownIEEEMode::Unknown:
         break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -521,6 +521,96 @@ define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
   ret float %med3
 }
 
+define float @fmed3_inf_x_y_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_inf_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_inf_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000000000000, float %x, float %y)
+  ret float %med3
+}
+
+define float @fmed3_x_inf_y_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_x_inf_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_inf_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000000000000, float %y)
+  ret float %med3
+}
+
+define float @fmed3_x_y_inf_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_x_y_inf_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_y_inf_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF0000000000000)
+  ret float %med3
+}
+
+define float @fmed3_ninf_x_y_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_ninf_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_ninf_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0xFFF0000000000000, float %x, float %y)
+  ret float %med3
+}
+
+define float @fmed3_x_ninf_y_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_x_ninf_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_ninf_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0xFFF0000000000000, float %y)
+  ret float %med3
+}
+
+define float @fmed3_x_y_ninf_f32(float %x, float %y) #1 {
+; IEEE1-LABEL: define float @fmed3_x_y_ninf_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT:    ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_y_ninf_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT:    ret float [[MED3]]
+;
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0xFFF0000000000000)
+  ret float %med3
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.fmed3 with default mode implied by shader CC
 ; --------------------------------------------------------------------