Skip to content

Commit 8e8f6aa

Browse files
committed
remove clamp
1 parent 9927a43 commit 8e8f6aa

File tree

3 files changed

+12
-24
lines changed

3 files changed

+12
-24
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -419,19 +419,18 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
419419
// Pack the offset and width of a BFE into
420420
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
421421
// source, bits [5:0] contain the offset and bits [22:16] the width.
422-
423-
// Ensure the high bits are clear to insert the offset.
424-
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
425-
auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
422+
// The 64 bit variants use bits [6:0]
423+
//
424+
// If the value takes more than 5/6 bits, the G_U/SBFX is ill-formed.
425+
// Thus, we do not clamp the values. We assume they are in range,
426+
// and if they aren't, it is UB anyway.
426427

427428
// Zeros out the low bits, so don't bother clamping the input value.
428429
auto ShiftAmt = B.buildConstant(S32, 16);
429430
auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt);
430431

431-
auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
432+
auto MergedInputs = B.buildOr(S32, OffsetReg, ShiftWidth);
432433

433-
MRI.setRegBank(OffsetMask.getReg(0), *RB);
434-
MRI.setRegBank(ClampOffset.getReg(0), *RB);
435434
MRI.setRegBank(ShiftAmt.getReg(0), *RB);
436435
MRI.setRegBank(ShiftWidth.getReg(0), *RB);
437436
MRI.setRegBank(MergedInputs.getReg(0), *RB);

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
1414
define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
1515
; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
1616
; GFX6: ; %bb.0:
17-
; GFX6-NEXT: s_and_b32 s1, s1, 63
1817
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1918
; GFX6-NEXT: s_or_b32 s1, s1, s2
2019
; GFX6-NEXT: s_bfe_i32 s0, s0, s1
@@ -32,7 +31,6 @@ define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i3
3231
define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
3332
; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
3433
; GFX6: ; %bb.0:
35-
; GFX6-NEXT: s_and_b32 s2, s2, 63
3634
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
3735
; GFX6-NEXT: s_or_b32 s2, s2, s3
3836
; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], s2
@@ -46,7 +44,6 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
4644
; GFX6: ; %bb.0:
4745
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
4846
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
49-
; GFX6-NEXT: s_and_b32 s3, s3, 63
5047
; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000
5148
; GFX6-NEXT: s_bfe_i32 s3, s2, s3
5249
; GFX6-NEXT: s_mov_b32 s2, -1
@@ -65,7 +62,7 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
6562
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
6663
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6764
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
68-
; GFX6-NEXT: s_or_b32 s3, 59, s3
65+
; GFX6-NEXT: s_or_b32 s3, 0x7b, s3
6966
; GFX6-NEXT: s_bfe_i32 s3, s2, s3
7067
; GFX6-NEXT: s_mov_b32 s2, -1
7168
; GFX6-NEXT: v_mov_b32_e32 v0, s3
@@ -82,9 +79,8 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
8279
; GFX6: ; %bb.0:
8380
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
8481
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
85-
; GFX6-NEXT: s_and_b32 s4, s2, 63
8682
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
87-
; GFX6-NEXT: s_or_b32 s3, s4, s3
83+
; GFX6-NEXT: s_or_b32 s3, s2, s3
8884
; GFX6-NEXT: s_bfe_i32 s3, 0x7b, s3
8985
; GFX6-NEXT: s_mov_b32 s2, -1
9086
; GFX6-NEXT: v_mov_b32_e32 v0, s3
@@ -120,7 +116,6 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out,
120116
; GFX6: ; %bb.0:
121117
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
122118
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
123-
; GFX6-NEXT: s_and_b32 s3, s3, 63
124119
; GFX6-NEXT: s_bfe_i32 s3, s2, s3
125120
; GFX6-NEXT: s_mov_b32 s2, -1
126121
; GFX6-NEXT: v_mov_b32_e32 v0, s3

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
1414
define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
1515
; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
1616
; GFX6: ; %bb.0:
17-
; GFX6-NEXT: s_and_b32 s1, s1, 63
1817
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1918
; GFX6-NEXT: s_or_b32 s1, s1, s2
2019
; GFX6-NEXT: s_bfe_u32 s0, s0, s1
@@ -32,7 +31,6 @@ define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i3
3231
define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
3332
; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
3433
; GFX6: ; %bb.0:
35-
; GFX6-NEXT: s_and_b32 s2, s2, 63
3634
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
3735
; GFX6-NEXT: s_or_b32 s2, s2, s3
3836
; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2
@@ -46,9 +44,8 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0,
4644
; GFX6: ; %bb.0:
4745
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
4846
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
49-
; GFX6-NEXT: s_and_b32 s4, s3, 63
50-
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
51-
; GFX6-NEXT: s_or_b32 s3, s4, s3
47+
; GFX6-NEXT: s_lshl_b32 s4, s3, 16
48+
; GFX6-NEXT: s_or_b32 s3, s3, s4
5249
; GFX6-NEXT: s_bfe_u32 s3, s2, s3
5350
; GFX6-NEXT: s_mov_b32 s2, -1
5451
; GFX6-NEXT: v_mov_b32_e32 v0, s3
@@ -65,7 +62,6 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
6562
; GFX6: ; %bb.0:
6663
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
6764
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
68-
; GFX6-NEXT: s_and_b32 s3, s3, 63
6965
; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000
7066
; GFX6-NEXT: s_bfe_u32 s3, s2, s3
7167
; GFX6-NEXT: s_mov_b32 s2, -1
@@ -84,7 +80,7 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
8480
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
8581
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8682
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
87-
; GFX6-NEXT: s_or_b32 s3, 59, s3
83+
; GFX6-NEXT: s_or_b32 s3, 0x7b, s3
8884
; GFX6-NEXT: s_bfe_u32 s3, s2, s3
8985
; GFX6-NEXT: s_mov_b32 s2, -1
9086
; GFX6-NEXT: v_mov_b32_e32 v0, s3
@@ -101,9 +97,8 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
10197
; GFX6: ; %bb.0:
10298
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
10399
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
104-
; GFX6-NEXT: s_and_b32 s4, s2, 63
105100
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
106-
; GFX6-NEXT: s_or_b32 s3, s4, s3
101+
; GFX6-NEXT: s_or_b32 s3, s2, s3
107102
; GFX6-NEXT: s_bfe_u32 s3, 0x7b, s3
108103
; GFX6-NEXT: s_mov_b32 s2, -1
109104
; GFX6-NEXT: v_mov_b32_e32 v0, s3
@@ -120,7 +115,6 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out,
120115
; GFX6: ; %bb.0:
121116
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
122117
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
123-
; GFX6-NEXT: s_and_b32 s3, s3, 63
124118
; GFX6-NEXT: s_bfe_u32 s3, s2, s3
125119
; GFX6-NEXT: s_mov_b32 s2, -1
126120
; GFX6-NEXT: v_mov_b32_e32 v0, s3

0 commit comments

Comments
 (0)