Skip to content

Commit 2f41fa3

Browse files
authored
[AMDGPU] Fix code object version not being set to 'none' (llvm#135036)
Summary: Previously, we removed the special handling for the code object version global. I erroneously thought that this meant we cold get rid of this weird `-Xclang` option. However, this also emits an LLVM IR module flag, which will then cause linking issues.
1 parent f2ff298 commit 2f41fa3

File tree

7 files changed

+38
-3
lines changed

7 files changed

+38
-3
lines changed

compiler-rt/cmake/builtin-config-ix.cmake

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC
2222
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
2323
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
2424
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
25+
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
2526
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
2627
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
2728
builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG)

compiler-rt/lib/builtins/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,12 @@ else ()
833833
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
834834
append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
835835
-fconvergent-functions BUILTIN_CFLAGS)
836+
837+
# AMDGPU targets want to use a generic ABI.
838+
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
839+
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
840+
"SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
841+
endif()
836842
endif()
837843

838844
set(BUILTIN_DEFS "")

libc/cmake/modules/LLVMLibCCompileOptionRules.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ function(_get_common_compile_options output_var flags)
215215
if(LIBC_CUDA_ROOT)
216216
list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}")
217217
endif()
218+
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
219+
list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none")
218220
endif()
219221
endif()
220222
set(${output_var} ${compile_options} PARENT_SCOPE)

libcxx/cmake/caches/AMDGPU.cmake

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "")
3232
set(LIBCXX_TEST_PARAMS "optimization=none;long_tests=False;executor=amdhsa-loader" CACHE STRING "")
3333

3434
# Necessary compile flags for AMDGPU.
35-
set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
36-
set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
35+
set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
36+
"-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
37+
set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
38+
"-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
3739
set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "")

offload/DeviceRTL/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
255255
endfunction()
256256

257257
add_custom_target(omptarget.devicertl.amdgpu)
258-
compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa)
258+
compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
259259

260260
add_custom_target(omptarget.devicertl.nvptx)
261261
compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)

offload/DeviceRTL/src/Mapping.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@
2020

2121
using namespace ompx;
2222

23+
// FIXME: This resolves the handling for the AMDGPU workgroup size when the ABI
24+
// is set to 'none'. We only support COV5+ but this can be removed when COV4 is
25+
// fully deprecated.
26+
#ifdef __AMDGPU__
27+
extern const inline uint32_t __oclc_ABI_version = 500;
28+
[[gnu::alias("__oclc_ABI_version")]] const uint32_t __oclc_ABI_version__;
29+
#endif
30+
2331
static bool isInLastWarp() {
2432
uint32_t MainTId = (mapping::getNumberOfThreadsInBlock() - 1) &
2533
~(mapping::getWarpSize() - 1);

offload/test/api/amdgpu_code_object.c

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -Xclang \
2+
// RUN: -mcode-object-version=5
3+
// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
4+
5+
// REQUIRES: amdgcn-amd-amdhsa
6+
7+
#include <stdio.h>
8+
9+
// Test to make sure we can build and run with the previous COV.
10+
int main() {
11+
#pragma omp target
12+
;
13+
14+
// CHECK: PASS
15+
printf("PASS\n");
16+
}

0 commit comments

Comments
 (0)