Skip to content

Commit 6f5e993

Browse files
authored
[DirectX] legalize usub.sat (#135288)
fixes #135285 This change implements the `usub.sat` intrinsic to perform an unsigned saturating subtraction on the 2 arguments. The minimum value this operation is clamp to is 0.
1 parent df579ce commit 6f5e993

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,27 @@ static bool isIntrinsicExpansion(Function &F) {
6565
case Intrinsic::dx_sign:
6666
case Intrinsic::dx_step:
6767
case Intrinsic::dx_radians:
68+
case Intrinsic::usub_sat:
6869
case Intrinsic::vector_reduce_add:
6970
case Intrinsic::vector_reduce_fadd:
7071
return true;
7172
}
7273
return false;
7374
}
75+
76+
static Value *expandUsubSat(CallInst *Orig) {
77+
Value *A = Orig->getArgOperand(0);
78+
Value *B = Orig->getArgOperand(1);
79+
Type *Ty = A->getType();
80+
81+
IRBuilder<> Builder(Orig);
82+
83+
Value *Cmp = Builder.CreateICmpULT(A, B, "usub.cmp");
84+
Value *Sub = Builder.CreateSub(A, B, "usub.sub");
85+
Value *Zero = ConstantInt::get(Ty, 0);
86+
return Builder.CreateSelect(Cmp, Zero, Sub, "usub.sat");
87+
}
88+
7489
static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
7590
assert(IntrinsicId == Intrinsic::vector_reduce_add ||
7691
IntrinsicId == Intrinsic::vector_reduce_fadd);
@@ -586,6 +601,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
586601
case Intrinsic::dx_radians:
587602
Result = expandRadiansIntrinsic(Orig);
588603
break;
604+
case Intrinsic::usub_sat:
605+
Result = expandUsubSat(Orig);
606+
break;
589607
case Intrinsic::vector_reduce_add:
590608
case Intrinsic::vector_reduce_fadd:
591609
Result = expandVecReduceAdd(Orig, IntrinsicId);

llvm/test/CodeGen/DirectX/usub_sat.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
4+
; Make sure dxil operation function calls for pow are generated.
5+
6+
define noundef i16 @usub_sat_i16(i16 noundef %a, i16 noundef %b) {
7+
; CHECK-LABEL: define noundef i16 @usub_sat_i16(
8+
; CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i16 [[A]], [[B]]
11+
; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i16 [[A]], [[B]]
12+
; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i16 0, i16 [[USUB_SUB]]
13+
; CHECK-NEXT: ret i16 [[ELT_USUB_SAT]]
14+
;
15+
entry:
16+
%elt.usub_sat = call i16 @llvm.usub.sat.i16(i16 %a, i16 %b)
17+
ret i16 %elt.usub_sat
18+
}
19+
20+
define noundef i32 @usub_sat_i32(i32 noundef %a, i32 noundef %b) {
21+
; CHECK-LABEL: define noundef i32 @usub_sat_i32(
22+
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
23+
; CHECK-NEXT: [[ENTRY:.*:]]
24+
; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i32 [[A]], [[B]]
25+
; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i32 [[A]], [[B]]
26+
; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i32 0, i32 [[USUB_SUB]]
27+
; CHECK-NEXT: ret i32 [[ELT_USUB_SAT]]
28+
;
29+
entry:
30+
%elt.usub_sat = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
31+
ret i32 %elt.usub_sat
32+
}
33+
34+
define noundef i64 @usub_sat_i64(i64 noundef %a, i64 noundef %b) {
35+
; CHECK-LABEL: define noundef i64 @usub_sat_i64(
36+
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) {
37+
; CHECK-NEXT: [[ENTRY:.*:]]
38+
; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i64 [[A]], [[B]]
39+
; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i64 [[A]], [[B]]
40+
; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i64 0, i64 [[USUB_SUB]]
41+
; CHECK-NEXT: ret i64 [[ELT_USUB_SAT]]
42+
;
43+
entry:
44+
%elt.usub_sat = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b)
45+
ret i64 %elt.usub_sat
46+
}
47+
48+
define noundef <4 x i32> @usub_sat_vec(<4 x i32> noundef %a, <4 x i32> noundef %b) {
49+
; CHECK-LABEL: define noundef <4 x i32> @usub_sat_vec(
50+
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
51+
; CHECK-NEXT: [[ENTRY:.*:]]
52+
; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
53+
; CHECK-NEXT: [[USUB_SUB:%.*]] = sub <4 x i32> [[A]], [[B]]
54+
; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select <4 x i1> [[USUB_CMP]], <4 x i32> zeroinitializer, <4 x i32> [[USUB_SUB]]
55+
; CHECK-NEXT: ret <4 x i32> [[ELT_USUB_SAT]]
56+
;
57+
entry:
58+
%elt.usub_sat = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
59+
ret <4 x i32> %elt.usub_sat
60+
}

0 commit comments

Comments
 (0)