Skip to content

Commit e2c5847

Browse files
committed
[AMDGPU] Consider XOR in waterfall loop as a terminator
Ensure the XOR in the waterfall loop for indirect addressing is considered a terminator. Differential Revision: https://reviews.llvm.org/D57703 llvm-svn: 353207
1 parent bb6d61c commit e2c5847

File tree

2 files changed

+116
-1
lines changed

2 files changed

+116
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2936,7 +2936,7 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
29362936

29372937
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
29382938
MachineInstr *InsertPt =
2939-
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
2939+
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
29402940
.addReg(AMDGPU::EXEC)
29412941
.addReg(NewExec);
29422942

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s
3+
4+
; Verify that we consider the xor at the end of the waterfall loop emitted for
5+
; divergent indirect addressing as a terminator.
6+
7+
declare i32 @llvm.amdgcn.workitem.id.x() #1
8+
9+
; There should be no spill code inserted between the xor and the real terminator
10+
define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
11+
; GCN-LABEL: name: extract_w_offset_vgpr
12+
; GCN: bb.0.entry:
13+
; GCN: successors: %bb.1(0x80000000)
14+
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
15+
; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
16+
; GCN: renamable $sgpr2 = COPY renamable $sgpr1
17+
; GCN: renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
18+
; GCN: renamable $sgpr5 = S_MOV_B32 61440
19+
; GCN: renamable $sgpr6 = S_MOV_B32 -1
20+
; GCN: undef renamable $sgpr8 = COPY killed renamable $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
21+
; GCN: renamable $sgpr9 = COPY killed renamable $sgpr2
22+
; GCN: renamable $sgpr10 = COPY killed renamable $sgpr6
23+
; GCN: renamable $sgpr11 = COPY killed renamable $sgpr5
24+
; GCN: renamable $sgpr2 = S_MOV_B32 16
25+
; GCN: renamable $sgpr4 = S_MOV_B32 15
26+
; GCN: renamable $sgpr5 = S_MOV_B32 14
27+
; GCN: renamable $sgpr6 = S_MOV_B32 13
28+
; GCN: renamable $sgpr7 = S_MOV_B32 12
29+
; GCN: renamable $sgpr12 = S_MOV_B32 11
30+
; GCN: renamable $sgpr13 = S_MOV_B32 10
31+
; GCN: renamable $sgpr14 = S_MOV_B32 9
32+
; GCN: renamable $sgpr15 = S_MOV_B32 8
33+
; GCN: renamable $sgpr16 = S_MOV_B32 7
34+
; GCN: renamable $sgpr17 = S_MOV_B32 6
35+
; GCN: renamable $sgpr18 = S_MOV_B32 5
36+
; GCN: renamable $sgpr19 = S_MOV_B32 3
37+
; GCN: renamable $sgpr20 = S_MOV_B32 2
38+
; GCN: renamable $sgpr21 = S_MOV_B32 1
39+
; GCN: renamable $sgpr22 = S_MOV_B32 0
40+
; GCN: renamable $vgpr1 = COPY killed renamable $sgpr22
41+
; GCN: renamable $vgpr2 = COPY killed renamable $sgpr21
42+
; GCN: renamable $vgpr3 = COPY killed renamable $sgpr20
43+
; GCN: renamable $vgpr4 = COPY killed renamable $sgpr19
44+
; GCN: renamable $vgpr5 = COPY killed renamable $sgpr18
45+
; GCN: renamable $vgpr6 = COPY killed renamable $sgpr17
46+
; GCN: renamable $vgpr7 = COPY killed renamable $sgpr16
47+
; GCN: renamable $vgpr8 = COPY killed renamable $sgpr15
48+
; GCN: renamable $vgpr9 = COPY killed renamable $sgpr14
49+
; GCN: renamable $vgpr10 = COPY killed renamable $sgpr13
50+
; GCN: renamable $vgpr11 = COPY killed renamable $sgpr12
51+
; GCN: renamable $vgpr12 = COPY killed renamable $sgpr7
52+
; GCN: renamable $vgpr13 = COPY killed renamable $sgpr6
53+
; GCN: renamable $vgpr14 = COPY killed renamable $sgpr5
54+
; GCN: renamable $vgpr15 = COPY killed renamable $sgpr4
55+
; GCN: renamable $vgpr16 = COPY killed renamable $sgpr2
56+
; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32
57+
; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2
58+
; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3
59+
; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4
60+
; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5
61+
; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6
62+
; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7
63+
; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8
64+
; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9
65+
; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10
66+
; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11
67+
; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12
68+
; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13
69+
; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14
70+
; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15
71+
; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16
72+
; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
73+
; GCN: renamable $vgpr1 = IMPLICIT_DEF
74+
; GCN: renamable $sgpr24_sgpr25 = IMPLICIT_DEF
75+
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
76+
; GCN: SI_SPILL_S128_SAVE killed $sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 16 into %stack.1, align 4, addrspace 5)
77+
; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
78+
; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.3, align 4, addrspace 5)
79+
; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
80+
; GCN: SI_SPILL_S64_SAVE killed $sgpr24_sgpr25, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
81+
; GCN: bb.1:
82+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
83+
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5)
84+
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
85+
; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
86+
; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
87+
; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
88+
; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
89+
; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit undef $m0
90+
; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
91+
; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
92+
; GCN: S_SET_GPR_IDX_OFF
93+
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
94+
; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
95+
; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
96+
; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5)
97+
; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
98+
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
99+
; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
100+
; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
101+
; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
102+
; GCN: bb.2:
103+
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.3, align 4, addrspace 5)
104+
; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
105+
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
106+
; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
107+
; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
108+
; GCN: S_ENDPGM
109+
entry:
110+
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
111+
%index = add i32 %id, 1
112+
%value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
113+
store i32 %value, i32 addrspace(1)* %out
114+
ret void
115+
}

0 commit comments

Comments
 (0)