Skip to content

Commit 8efe3c1

Browse files
committed
rename flag, apply module flag only for sycl
1 parent f6cba9c commit 8efe3c1

File tree

4 files changed

+12
-10
lines changed

4 files changed

+12
-10
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -985,10 +985,6 @@ defm cuda_prec_sqrt : BoolFOption<"cuda-prec-sqrt",
985985
PosFlag<SetTrue, [CC1Option], "Specify">,
986986
NegFlag<SetFalse, [], "Don't specify">,
987987
BothFlags<[], " that sqrt is correctly rounded (for CUDA devices)">>;
988-
defm nvvm_cuda_approx_tanhf : BoolFOption<"cuda-approx-tanhf",
989-
TargetOpts<"NVVMCudaApproxTanhf">, DefaultFalse,
990-
PosFlag<SetTrue, [CC1Option], "Only SYCL. Use the built-in fast approximation of tanhf function. Device needs to have a compute capability >= 8.0">,
991-
NegFlag<SetFalse>>;
992988
def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
993989
HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
994990
def hip_path_EQ : Joined<["--"], "hip-path=">, Group<i_Group>,
@@ -4730,7 +4726,10 @@ def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group<s
47304726
Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of "
47314727
"device libraries from device binary linkage. Valid arguments "
47324728
"are libc, libm-fp32, libm-fp64, all">;
4733-
4729+
defm nvvm_cuda_approx_tanh : BoolFOption<"sycl-cuda-approx-tanh",
4730+
TargetOpts<"NVVMCudaApproxTanhf">, DefaultFalse,
4731+
PosFlag<SetTrue, [CC1Option], "Use the built-in fast approximation of tanh function. Device needs to have a compute capability >= 8.0">,
4732+
NegFlag<SetFalse>>;
47344733
//===----------------------------------------------------------------------===//
47354734
// FLangOption + CoreOption + NoXarchOption
47364735
//===----------------------------------------------------------------------===//

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,9 @@ void CodeGenModule::Release() {
780780
llvm::DenormalMode::IEEE);
781781
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-prec-sqrt",
782782
getTarget().getTargetOpts().NVVMCudaPrecSqrt);
783+
}
784+
785+
if ( LangOpts.isSYCL() && getTriple().isNVPTX()) {
783786
getModule().addModuleFlag(llvm::Module::Override,
784787
"nvvm-reflect-approx-tanhf",
785788
getTarget().getTargetOpts().NVVMCudaApproxTanhf);

clang/test/CodeGenCUDA/flush-denormals.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ extern "C" __device__ void foo() {}
4444
// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
4545
// NOFTZ-NOT: "denormal-fp-math-f32"
4646

47-
// PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}, {{.*}}}
47+
// PTXFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
4848
// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
4949

50-
// PTXNOFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}, {{.*}}}
50+
// PTXNOFTZ:!llvm.module.flags = !{{{.*}}, [[MODFLAG:![0-9]+]], {{.*}}}
5151
// PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}

clang/test/CodeGenCUDA/nvvm-reflect-approx-tanh.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -fcuda-approx-tanhf %s -o -| FileCheck --check-prefix=CHECK-ON %s
2-
// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm %s -o -| FileCheck --check-prefix=CHECK-OFF %s
1+
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -fsycl-cuda-approx-tanh %s -o -| FileCheck --check-prefix=CHECK-ON %s
2+
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm %s -o -| FileCheck --check-prefix=CHECK-OFF %s
33

44
#include "Inputs/cuda.h"
55

6-
// Check that the -fcuda-approx-tanhf flag correctly sets the nvvm-reflect module flags.
6+
// Check that the -fsycl-cuda-approx-tanh flag correctly sets the nvvm-reflect module flags.
77

88
extern "C" __device__ void foo() {}
99

0 commit comments

Comments
 (0)