Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions llvm/include/llvm/IR/NVVMIntrinsicUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,70 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
"Checking invalid f2i/d2i intrinsic for signed int conversion");
}

inline bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// f2i
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2i_rn:
case Intrinsic::nvvm_f2i_rp:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_f2i_rm_ftz:
case Intrinsic::nvvm_f2i_rn_ftz:
case Intrinsic::nvvm_f2i_rp_ftz:
case Intrinsic::nvvm_f2i_rz_ftz:
// f2ui
case Intrinsic::nvvm_f2ui_rm:
case Intrinsic::nvvm_f2ui_rn:
case Intrinsic::nvvm_f2ui_rp:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_f2ui_rm_ftz:
case Intrinsic::nvvm_f2ui_rn_ftz:
case Intrinsic::nvvm_f2ui_rp_ftz:
case Intrinsic::nvvm_f2ui_rz_ftz:
return true;
// d2i
case Intrinsic::nvvm_d2i_rm:
case Intrinsic::nvvm_d2i_rn:
case Intrinsic::nvvm_d2i_rp:
case Intrinsic::nvvm_d2i_rz:
// d2ui
case Intrinsic::nvvm_d2ui_rm:
case Intrinsic::nvvm_d2ui_rn:
case Intrinsic::nvvm_d2ui_rp:
case Intrinsic::nvvm_d2ui_rz:
// f2ll
case Intrinsic::nvvm_f2ll_rm:
case Intrinsic::nvvm_f2ll_rn:
case Intrinsic::nvvm_f2ll_rp:
case Intrinsic::nvvm_f2ll_rz:
case Intrinsic::nvvm_f2ll_rm_ftz:
case Intrinsic::nvvm_f2ll_rn_ftz:
case Intrinsic::nvvm_f2ll_rp_ftz:
case Intrinsic::nvvm_f2ll_rz_ftz:
// f2ull
case Intrinsic::nvvm_f2ull_rm:
case Intrinsic::nvvm_f2ull_rn:
case Intrinsic::nvvm_f2ull_rp:
case Intrinsic::nvvm_f2ull_rz:
case Intrinsic::nvvm_f2ull_rm_ftz:
case Intrinsic::nvvm_f2ull_rn_ftz:
case Intrinsic::nvvm_f2ull_rp_ftz:
case Intrinsic::nvvm_f2ull_rz_ftz:
// d2ll
case Intrinsic::nvvm_d2ll_rm:
case Intrinsic::nvvm_d2ll_rn:
case Intrinsic::nvvm_d2ll_rp:
case Intrinsic::nvvm_d2ll_rz:
// d2ull
case Intrinsic::nvvm_d2ull_rm:
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
return false;
}
llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
}

inline APFloat::roundingMode
GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
Expand Down
23 changes: 15 additions & 8 deletions llvm/lib/Analysis/ConstantFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2625,8 +2625,17 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz: {
// In float-to-integer conversion, NaN inputs are converted to 0.
if (U.isNaN())
return ConstantInt::get(Ty, 0);
if (U.isNaN()) {
// In float-to-integer conversion, NaN inputs are converted to 0
// when the source and destination bitwidths are both less than 64.
if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID))
return ConstantInt::get(Ty, 0);

// Otherwise, the most significant bit is set.
unsigned BitWidth = Ty->getIntegerBitWidth();
uint64_t Val = 1ULL << (BitWidth - 1);
return ConstantInt::get(Ty, APInt(BitWidth, Val, /*IsSigned=*/false));
}

APFloat::roundingMode RMode =
nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
Expand All @@ -2636,13 +2645,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;

// Return max/min value for integers if the result is +/-inf or
// is too large to fit in the result's integer bitwidth.
bool IsExact = false;
APFloat::opStatus Status =
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);

if (Status != APFloat::opInvalidOp)
return ConstantInt::get(Ty, ResInt);
return nullptr;
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
return ConstantInt::get(Ty, ResInt);
}
}

Expand Down
21 changes: 6 additions & 15 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,12 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
return {Intrinsic::trunc, FTZ_MustBeOn};

// NVVM intrinsics that map to LLVM cast operations.
//
// Note that llvm's target-generic conversion operators correspond to the rz
// (round to zero) versions of the nvvm conversion intrinsics, even though
// most everything else here uses the rn (round to nearest even) nvvm ops.
case Intrinsic::nvvm_d2i_rz:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_d2ll_rz:
case Intrinsic::nvvm_f2ll_rz:
return {Instruction::FPToSI};
case Intrinsic::nvvm_d2ui_rz:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_d2ull_rz:
case Intrinsic::nvvm_f2ull_rz:
return {Instruction::FPToUI};
// Integer to floating-point uses RN rounding, not RZ
// Note - we cannot map intrinsics like nvvm_d2ll_rz to LLVM's
// FPToSI, as NaN to int conversion with FPToSI is considered UB and is
// eliminated. NVVM conversion intrinsics are translated to PTX cvt
// instructions which define the outcome for NaN rather than leaving as UB.
// Therefore, translate NVVM intrinsics to sitofp/uitofp, but not to
// fptosi/fptoui.
case Intrinsic::nvvm_i2d_rn:
case Intrinsic::nvvm_i2f_rn:
case Intrinsic::nvvm_ll2d_rn:
Expand Down
29 changes: 20 additions & 9 deletions llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -185,52 +185,63 @@ define float @trunc_float_ftz(float %a) #0 {
}

; Check NVVM intrinsics that correspond to LLVM cast operations.
; fp -> integer casts should not be converted, as the semantics
; for NaN/Inf/Overflow inputs are different.
; Only integer -> fp casts should be converted.

; CHECK-LABEL: @test_d2i
define i32 @test_d2i(double %a) #0 {
; CHECK: fptosi double %a to i32
; CHECK: call i32 @llvm.nvvm.d2i.rz(double %a)
; CHECK-NOT: fptosi double %a to i32
%ret = call i32 @llvm.nvvm.d2i.rz(double %a)
ret i32 %ret
}
; CHECK-LABEL: @test_f2i
define i32 @test_f2i(float %a) #0 {
; CHECK: fptosi float %a to i32
; CHECK: call i32 @llvm.nvvm.f2i.rz(float %a)
; CHECK-NOT: fptosi float %a to i32
%ret = call i32 @llvm.nvvm.f2i.rz(float %a)
ret i32 %ret
}
; CHECK-LABEL: @test_d2ll
define i64 @test_d2ll(double %a) #0 {
; CHECK: fptosi double %a to i64
; CHECK: call i64 @llvm.nvvm.d2ll.rz(double %a)
; CHECK-NOT: fptosi double %a to i64
%ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
ret i64 %ret
}
; CHECK-LABEL: @test_f2ll
define i64 @test_f2ll(float %a) #0 {
; CHECK: fptosi float %a to i64
; CHECK: call i64 @llvm.nvvm.f2ll.rz(float %a)
; CHECK-NOT: fptosi float %a to i64
%ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
ret i64 %ret
}
; CHECK-LABEL: @test_d2ui
define i32 @test_d2ui(double %a) #0 {
; CHECK: fptoui double %a to i32
; CHECK: call i32 @llvm.nvvm.d2ui.rz(double %a)
; CHECK-NOT: fptoui double %a to i32
%ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
ret i32 %ret
}
; CHECK-LABEL: @test_f2ui
define i32 @test_f2ui(float %a) #0 {
; CHECK: fptoui float %a to i32
; CHECK: call i32 @llvm.nvvm.f2ui.rz(float %a)
; CHECK-NOT: fptoui float %a to i32
%ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
ret i32 %ret
}
; CHECK-LABEL: @test_d2ull
define i64 @test_d2ull(double %a) #0 {
; CHECK: fptoui double %a to i64
; CHECK: call i64 @llvm.nvvm.d2ull.rz(double %a)
; CHECK-NOT: fptoui double %a to i64
%ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
ret i64 %ret
}
; CHECK-LABEL: @test_f2ull
define i64 @test_f2ull(float %a) #0 {
; CHECK: fptoui float %a to i64
; CHECK: call i64 @llvm.nvvm.f2ull.rz(float %a)
; CHECK-NOT: fptoui float %a to i64
%ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
ret i64 %ret
}
Expand Down Expand Up @@ -497,4 +508,4 @@ declare float @llvm.nvvm.ui2f.rn(i32)
declare double @llvm.nvvm.ull2d.rn(i64)
declare float @llvm.nvvm.ull2f.rn(i64)
declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32)
declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)
declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)
Loading