Skip to content

Commit 6b27035

Browse files
authored
[SelectionDAG] Allow FREEZE to be hoisted before FP SETCC. (#84358)
No nans/infs in SelectionDAG is complicated. Hopefully I've captured all of the cases. I've only applied to ConsiderFlags to the SDNodeFlags since those are the only ones that will be droped by hoisting. The condition code and TargetOptions would still be in effect. Recovers some regression from #84232.
1 parent dc567a2 commit 6b27035

File tree

8 files changed

+991
-991
lines changed

8 files changed

+991
-991
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5091,10 +5091,24 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
50915091
case ISD::BUILD_PAIR:
50925092
return false;
50935093

5094-
case ISD::SETCC:
5094+
case ISD::SETCC: {
50955095
// Integer setcc cannot create undef or poison.
5096-
// FIXME: Support FP.
5097-
return !Op.getOperand(0).getValueType().isInteger();
5096+
if (Op.getOperand(0).getValueType().isInteger())
5097+
return false;
5098+
5099+
// FP compares are more complicated. They can create poison for nan/infinity
5100+
// based on options and flags. The options and flags also cause special
5101+
// nonan condition codes to be used. Those condition codes may be preserved
5102+
// even if the nonan flag is dropped somewhere.
5103+
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5104+
if (((unsigned)CCCode & 0x10U))
5105+
return true;
5106+
5107+
const TargetOptions &Options = getTarget().Options;
5108+
return Options.NoNaNsFPMath || Options.NoInfsFPMath ||
5109+
(ConsiderFlags &&
5110+
(Op->getFlags().hasNoNaNs() || Op->getFlags().hasNoInfs()));
5111+
}
50985112

50995113
// Matches hasPoisonGeneratingFlags().
51005114
case ISD::ZERO_EXTEND:

llvm/test/CodeGen/RISCV/bfloat-convert.ll

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -479,9 +479,8 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
479479
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
480480
; RV32IZFBFMIN-NEXT: neg a2, a2
481481
; RV32IZFBFMIN-NEXT: lui a4, 524288
482-
; RV32IZFBFMIN-NEXT: li a5, 1
483482
; RV32IZFBFMIN-NEXT: lui a3, 524288
484-
; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
483+
; RV32IZFBFMIN-NEXT: beqz s2, .LBB10_2
485484
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
486485
; RV32IZFBFMIN-NEXT: mv a3, a1
487486
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
@@ -525,9 +524,8 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
525524
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
526525
; R32IDZFBFMIN-NEXT: neg a2, a2
527526
; R32IDZFBFMIN-NEXT: lui a4, 524288
528-
; R32IDZFBFMIN-NEXT: li a5, 1
529527
; R32IDZFBFMIN-NEXT: lui a3, 524288
530-
; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
528+
; R32IDZFBFMIN-NEXT: beqz s2, .LBB10_2
531529
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
532530
; R32IDZFBFMIN-NEXT: mv a3, a1
533531
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
@@ -548,50 +546,43 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
548546
;
549547
; RV32ID-LABEL: fcvt_l_bf16_sat:
550548
; RV32ID: # %bb.0: # %start
551-
; RV32ID-NEXT: addi sp, sp, -32
552-
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
553-
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
554-
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
555-
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
556-
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
549+
; RV32ID-NEXT: addi sp, sp, -16
550+
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
551+
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
557552
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
558-
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
559-
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
560553
; RV32ID-NEXT: fmv.x.w a0, fa0
561554
; RV32ID-NEXT: slli a0, a0, 16
562555
; RV32ID-NEXT: fmv.w.x fs0, a0
563-
; RV32ID-NEXT: flt.s s0, fa5, fs0
564-
; RV32ID-NEXT: neg s1, s0
565556
; RV32ID-NEXT: lui a0, 913408
566557
; RV32ID-NEXT: fmv.w.x fa5, a0
567-
; RV32ID-NEXT: fle.s s2, fa5, fs0
568-
; RV32ID-NEXT: neg s3, s2
558+
; RV32ID-NEXT: fle.s s0, fa5, fs0
569559
; RV32ID-NEXT: fmv.s fa0, fs0
570560
; RV32ID-NEXT: call __fixsfdi
571-
; RV32ID-NEXT: and a0, s3, a0
572-
; RV32ID-NEXT: or a0, s1, a0
573-
; RV32ID-NEXT: feq.s a2, fs0, fs0
574-
; RV32ID-NEXT: neg a2, a2
575561
; RV32ID-NEXT: lui a4, 524288
576-
; RV32ID-NEXT: li a5, 1
577-
; RV32ID-NEXT: lui a3, 524288
578-
; RV32ID-NEXT: bne s2, a5, .LBB10_2
562+
; RV32ID-NEXT: lui a2, 524288
563+
; RV32ID-NEXT: beqz s0, .LBB10_2
579564
; RV32ID-NEXT: # %bb.1: # %start
580-
; RV32ID-NEXT: mv a3, a1
565+
; RV32ID-NEXT: mv a2, a1
581566
; RV32ID-NEXT: .LBB10_2: # %start
582-
; RV32ID-NEXT: and a0, a2, a0
583-
; RV32ID-NEXT: beqz s0, .LBB10_4
567+
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
568+
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
569+
; RV32ID-NEXT: flt.s a3, fa5, fs0
570+
; RV32ID-NEXT: beqz a3, .LBB10_4
584571
; RV32ID-NEXT: # %bb.3:
585-
; RV32ID-NEXT: addi a3, a4, -1
572+
; RV32ID-NEXT: addi a2, a4, -1
586573
; RV32ID-NEXT: .LBB10_4: # %start
587-
; RV32ID-NEXT: and a1, a2, a3
588-
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
589-
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
590-
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
591-
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
592-
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
574+
; RV32ID-NEXT: feq.s a1, fs0, fs0
575+
; RV32ID-NEXT: neg a4, a1
576+
; RV32ID-NEXT: and a1, a4, a2
577+
; RV32ID-NEXT: neg a2, a3
578+
; RV32ID-NEXT: neg a3, s0
579+
; RV32ID-NEXT: and a0, a3, a0
580+
; RV32ID-NEXT: or a0, a2, a0
581+
; RV32ID-NEXT: and a0, a4, a0
582+
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
583+
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
593584
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
594-
; RV32ID-NEXT: addi sp, sp, 32
585+
; RV32ID-NEXT: addi sp, sp, 16
595586
; RV32ID-NEXT: ret
596587
;
597588
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
@@ -675,8 +666,7 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
675666
; CHECK32ZFBFMIN-NEXT: neg s0, a0
676667
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
677668
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
678-
; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
679-
; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
669+
; CHECK32ZFBFMIN-NEXT: neg s1, a0
680670
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
681671
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
682672
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
@@ -703,8 +693,7 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
703693
; RV32ID-NEXT: neg s0, a0
704694
; RV32ID-NEXT: fmv.w.x fa5, zero
705695
; RV32ID-NEXT: fle.s a0, fa5, fa0
706-
; RV32ID-NEXT: xori a0, a0, 1
707-
; RV32ID-NEXT: addi s1, a0, -1
696+
; RV32ID-NEXT: neg s1, a0
708697
; RV32ID-NEXT: call __fixunssfdi
709698
; RV32ID-NEXT: and a0, s1, a0
710699
; RV32ID-NEXT: or a0, s0, a0

llvm/test/CodeGen/RISCV/double-convert.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,8 +1034,7 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
10341034
; RV32IFD-NEXT: neg s0, a0
10351035
; RV32IFD-NEXT: fcvt.d.w fa5, zero
10361036
; RV32IFD-NEXT: fle.d a0, fa5, fa0
1037-
; RV32IFD-NEXT: xori a0, a0, 1
1038-
; RV32IFD-NEXT: addi s1, a0, -1
1037+
; RV32IFD-NEXT: neg s1, a0
10391038
; RV32IFD-NEXT: call __fixunsdfdi
10401039
; RV32IFD-NEXT: and a0, s1, a0
10411040
; RV32IFD-NEXT: or a0, s0, a0
@@ -1062,27 +1061,28 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
10621061
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
10631062
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
10641063
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1064+
; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
10651065
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
10661066
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
10671067
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
10681068
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
1069-
; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
10701069
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
10711070
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
1072-
; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI14_0)
1073-
; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a3)
1074-
; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a3)
1075-
; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
1076-
; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
1077-
; RV32IZFINXZDINX-NEXT: and a0, a2, a0
1078-
; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
1079-
; RV32IZFINXZDINX-NEXT: neg a3, a3
1080-
; RV32IZFINXZDINX-NEXT: or a0, a3, a0
1081-
; RV32IZFINXZDINX-NEXT: and a1, a2, a1
1082-
; RV32IZFINXZDINX-NEXT: or a1, a3, a1
1071+
; RV32IZFINXZDINX-NEXT: neg s2, a2
1072+
; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
1073+
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI14_0)
1074+
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI14_0+4)(a2)
1075+
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
1076+
; RV32IZFINXZDINX-NEXT: and a0, s2, a0
1077+
; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
1078+
; RV32IZFINXZDINX-NEXT: neg a2, a2
1079+
; RV32IZFINXZDINX-NEXT: or a0, a2, a0
1080+
; RV32IZFINXZDINX-NEXT: and a1, s2, a1
1081+
; RV32IZFINXZDINX-NEXT: or a1, a2, a1
10831082
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
10841083
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
10851084
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1085+
; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
10861086
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
10871087
; RV32IZFINXZDINX-NEXT: ret
10881088
;

0 commit comments

Comments
 (0)