Skip to content

Commit d97f25b

Browse files
authored
[AMPGPU] Emit s_singleuse_vdst instructions when a register is used multiple times in the same instruction. (llvm#89601)
Previously, multiple uses of a register within the same instruction were being counted as multiple uses. This has been corrected to only count as a single use as per the specification allowing for more optimisation candidates.
1 parent 721c31e commit d97f25b

File tree

2 files changed

+52
-24
lines changed

2 files changed

+52
-24
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -83,37 +83,42 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
8383
// instruction to be marked as a single use producer.
8484
bool AllProducerOperandsAreSingleUse = true;
8585

86-
for (const auto &Operand : MI.operands()) {
87-
if (!Operand.isReg())
88-
continue;
89-
const auto Reg = Operand.getReg();
90-
91-
// Count the number of times each register is read.
92-
if (Operand.readsReg())
93-
for (const MCRegUnit &Unit : TRI->regunits(Reg))
94-
RegisterUseCount[Unit]++;
95-
96-
// Do not attempt to optimise across exec mask changes.
97-
if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
98-
for (auto &UsedReg : RegisterUseCount)
99-
UsedReg.second = 2;
100-
}
86+
// Gather a list of Registers used before updating use counts to avoid
87+
// double counting registers that appear multiple times in a single
88+
// MachineInstr.
89+
SmallVector<MCRegUnit> RegistersUsed;
10190

102-
// If we are at the point where the register first became live,
103-
// check if the operands are single use.
104-
if (!MI.modifiesRegister(Reg, TRI))
105-
continue;
91+
for (const auto &Operand : MI.all_defs()) {
92+
const auto Reg = Operand.getReg();
10693

10794
const auto RegUnits = TRI->regunits(Reg);
108-
if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit &Unit) {
95+
if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
10996
return RegisterUseCount[Unit] > 1;
11097
}))
11198
AllProducerOperandsAreSingleUse = false;
11299

113100
// Reset uses count when a register is no longer live.
114-
for (const MCRegUnit &Unit : RegUnits)
101+
for (const MCRegUnit Unit : RegUnits)
115102
RegisterUseCount.erase(Unit);
116103
}
104+
105+
for (const auto &Operand : MI.all_uses()) {
106+
const auto Reg = Operand.getReg();
107+
108+
// Count the number of times each register is read.
109+
for (const MCRegUnit Unit : TRI->regunits(Reg)) {
110+
if (!is_contained(RegistersUsed, Unit))
111+
RegistersUsed.push_back(Unit);
112+
}
113+
}
114+
for (const MCRegUnit Unit : RegistersUsed)
115+
RegisterUseCount[Unit]++;
116+
117+
// Do not attempt to optimise across exec mask changes.
118+
if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
119+
for (auto &UsedReg : RegisterUseCount)
120+
UsedReg.second = 2;
121+
}
117122
if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
118123
// TODO: Replace with candidate logging for instruction grouping
119124
// later.

llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ body: |
136136
; CHECK-NEXT: successors: %bb.1(0x80000000)
137137
; CHECK-NEXT: liveins: $vgpr0
138138
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: S_SINGLEUSE_VDST 1
139140
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
140141
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
141142
; CHECK-NEXT: {{ $}}
@@ -278,6 +279,31 @@ body: |
278279
liveins: $vgpr2, $vgpr3
279280
...
280281

282+
# Second use is an instruction that reads and writes v1.
283+
---
284+
name: multiple_uses_4
285+
tracksRegLiveness: true
286+
body: |
287+
; CHECK-LABEL: name: multiple_uses_4
288+
; CHECK: bb.0:
289+
; CHECK-NEXT: successors: %bb.1(0x80000000)
290+
; CHECK-NEXT: liveins: $vgpr0
291+
; CHECK-NEXT: {{ $}}
292+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
293+
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
294+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
295+
; CHECK-NEXT: {{ $}}
296+
; CHECK-NEXT: bb.1:
297+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
298+
bb.0:
299+
liveins: $vgpr0
300+
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
301+
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
302+
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
303+
bb.1:
304+
liveins: $vgpr0, $vgpr1, $vgpr2
305+
...
306+
281307
# Results are live-in to another basic block.
282308
---
283309
name: basic_block_1
@@ -398,7 +424,6 @@ body: |
398424
; CHECK-NEXT: successors: %bb.1(0x80000000)
399425
; CHECK-NEXT: liveins: $sgpr0_sgpr1
400426
; CHECK-NEXT: {{ $}}
401-
; CHECK-NEXT: S_SINGLEUSE_VDST 1
402427
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
403428
; CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
404429
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -424,7 +449,6 @@ body: |
424449
; CHECK-NEXT: successors: %bb.1(0x80000000)
425450
; CHECK-NEXT: liveins: $sgpr0
426451
; CHECK-NEXT: {{ $}}
427-
; CHECK-NEXT: S_SINGLEUSE_VDST 1
428452
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
429453
; CHECK-NEXT: $exec_lo = COPY $sgpr0
430454
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -450,7 +474,6 @@ body: |
450474
; CHECK-NEXT: successors: %bb.1(0x80000000)
451475
; CHECK-NEXT: liveins: $sgpr0
452476
; CHECK-NEXT: {{ $}}
453-
; CHECK-NEXT: S_SINGLEUSE_VDST 1
454477
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
455478
; CHECK-NEXT: $exec_hi = COPY $sgpr0
456479
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec

0 commit comments

Comments
 (0)