Skip to content

Commit 39aeab4

Browse files
committed
AMDGPU/EG: Add a new FeatureFMA and use it to selectively enable FMA instruction
Only used by pre-GCN targets v2: fix predicate setting for FMA_Common Differential Revision: https://reviews.llvm.org/D40692 llvm-svn: 319712
1 parent 9836094 commit 39aeab4

File tree

7 files changed

+31
-4
lines changed

7 files changed

+31
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64",
1919
"Enable double precision operations"
2020
>;
2121

22+
def FeatureFMA : SubtargetFeature<"fmaf",
23+
"FMA",
24+
"true",
25+
"Enable single precision FMA (not as fast as mul+add, but fused)"
26+
>;
27+
2228
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
2329
"FastFMAF32",
2430
"true",

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
4949
def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
5050
def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
5151
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
52+
def FMA : Predicate<"Subtarget->hasFMA()">;
5253

5354
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
5455
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
140140

141141
// Subtarget statically properties set by tablegen
142142
bool FP64;
143+
bool FMA;
143144
bool IsGCN;
144145
bool GCN3Encoding;
145146
bool CIInsts;
@@ -348,6 +349,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
348349
return CaymanISA;
349350
}
350351

352+
bool hasFMA() const {
353+
return FMA;
354+
}
355+
351356
TrapHandlerAbi getTrapHandlerAbi() const {
352357
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
353358
}

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
211211
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
212212
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
213213

214+
if (!Subtarget->hasFMA()) {
215+
setOperationAction(ISD::FMA, MVT::f32, Expand);
216+
setOperationAction(ISD::FMA, MVT::f64, Expand);
217+
}
218+
214219
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
215220

216221
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

llvm/lib/Target/AMDGPU/R600Instructions.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -989,7 +989,10 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
989989
class FMA_Common <bits<5> inst> : R600_3OP <
990990
inst, "FMA",
991991
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
992-
>;
992+
>
993+
{
994+
let OtherPredicates = [FMA];
995+
}
993996

994997
class CNDE_Common <bits<5> inst> : R600_3OP <
995998
inst, "CNDE",

llvm/lib/Target/AMDGPU/R600Processors.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin,
5353
>;
5454

5555
def : Processor<"cypress", R600_VLIW5_Itin,
56-
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache]
56+
[FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA]
5757
>;
5858

5959
def : Processor<"juniper", R600_VLIW5_Itin,
@@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin,
8282
>;
8383

8484
def : Processor<"cayman", R600_VLIW4_Itin,
85-
[FeatureNorthernIslands, FeatureCaymanISA]
85+
[FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA]
8686
>;
8787

8888
def : Processor<"turks", R600_VLIW5_Itin,

llvm/test/CodeGen/AMDGPU/fma.ll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2-
; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2+
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s
4+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s
5+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s
6+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s
7+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s
8+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s
9+
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s
310

411
declare float @llvm.fma.f32(float, float, float) nounwind readnone
512
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone

0 commit comments

Comments
 (0)