Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a subregclass from SGPR for the call clobbered register pairs. #160

Draft
wants to merge 1 commit into
base: amd-common
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
LivePhysRegs LiveRegs(*ST.getRegisterInfo());
LiveRegs.addLiveIns(MBB);

// to avoid clobbering the registers used in the return instruction.
if (MBBI->getOpcode() == AMDGPU::S_SETPC_B64_return &&
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it is safe to check for the opcode first, and then get the registers.
Not sure whether I should check the operand is register.

MBBI->getOperand(0).isReg()) {
LiveRegs.addReg(MBBI->getOperand(0).getReg());
}

ScratchExecCopy
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
*TRI.getWaveMaskRegClass());
Expand Down
10 changes: 5 additions & 5 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2180,13 +2180,13 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// FIXME: Should be able to use a vreg here, but need a way to prevent it
// from being allcoated to a CSR.

SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
MVT::i64);

Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
SDValue ReturnAddrVirtualReg = DAG.getRegister(MF.getRegInfo().createVirtualRegister(
&AMDGPU::CCR_SGPR_64RegClass),
MVT::i64);
Chain = DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
Flag = Chain.getValue(1);

RetOps.push_back(PhysReturnAddrReg);
RetOps.push_back(ReturnAddrVirtualReg);
}

// Copy the result values into the output registers.
Expand Down
6 changes: 6 additions & 0 deletions lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,12 @@ def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32
let AllocationPriority = 8;
}

// CCR (call clobbered registers) SGPR 64-bit registers
def CCR_SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add (trunc SGPR_64, 16))> {
let CopyCost = 1;
let AllocationPriority = 8;
}

def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> {
let isAllocatable = 0;
}
Expand Down
7 changes: 6 additions & 1 deletion lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
let has_sdst = 0;
}

// 64-bit input(a subset of SGPR_64) , no output
class SOP1_64_2 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs), (ins CCR_SGPR_64:$src0), "$src0", pattern> {
let has_sdst = 0;
}

let isMoveImm = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
Expand Down Expand Up @@ -224,7 +229,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;

let isReturn = 1 in {
// Define variant marked as return rather than branch.
def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>;
def S_SETPC_B64_return : SOP1_64_2<"", [(AMDGPUret_flag i64:$src0)]>;
}
} // End isTerminator = 1, isBarrier = 1

Expand Down