Skip to content

Commit bd6df0f

Browse files
authored
Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (llvm#132096)" (llvm#132277)
Summary: There were a few issues with the first one, leading to some errors and warnings. Most importantly, this was building on MSVC which isn't supported.
1 parent 4a2ab3a commit bd6df0f

File tree

15 files changed

+228
-236
lines changed

15 files changed

+228
-236
lines changed

libc/CMakeLists.txt

-7
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace}
5959
CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'."
6060
)
6161

62-
# We will build the GPU utilities if we are not doing a runtimes build.
63-
option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
64-
if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD))
65-
add_subdirectory(utils/gpu)
66-
return()
67-
endif()
68-
6962
option(LIBC_CMAKE_VERBOSE_LOGGING
7063
"Log details warnings and notifications during CMake configuration." OFF)
7164

libc/src/__support/RPC/rpc_server.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020
#define __has_builtin(x) 0
2121
#endif
2222

23+
// Workaround for missing __builtin_is_constant_evaluated in < GCC 10.
24+
#ifndef __builtin_is_constant_evaluated
25+
#define __builtin_is_constant_evaluated(x) 0
26+
#endif
27+
2328
// Configs for using the LLVM libc writer interface.
2429
#define LIBC_COPT_USE_C_ASSERT
2530
#define LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY
@@ -28,7 +33,7 @@
2833
#define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
2934
#define LIBC_COPT_PRINTF_DISABLE_STRERROR
3035

31-
// The 'long double' type is 8 byte
36+
// The 'long double' type is 8 bytes.
3237
#define LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64
3338

3439
#include "shared/rpc.h"

libc/utils/gpu/CMakeLists.txt

-1
This file was deleted.

libc/utils/gpu/loader/CMakeLists.txt

-54
This file was deleted.

libc/utils/gpu/loader/amdgpu/CMakeLists.txt

-10
This file was deleted.

libc/utils/gpu/loader/nvptx/CMakeLists.txt

-9
This file was deleted.

llvm/CMakeLists.txt

-4
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
210210
"${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
211211
set(LLVM_LIBC_GPU_BUILD ON)
212212
endif()
213-
if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD)
214-
message(STATUS "Enabling libc project to build libc testing tools")
215-
list(APPEND LLVM_ENABLE_PROJECTS "libc")
216-
endif()
217213

218214
# LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the
219215
# `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for

llvm/runtimes/CMakeLists.txt

-14
Original file line numberDiff line numberDiff line change
@@ -534,20 +534,6 @@ if(build_runtimes)
534534
endif()
535535
if(LLVM_LIBC_GPU_BUILD)
536536
list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
537-
if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
538-
if(TARGET amdhsa-loader)
539-
list(APPEND extra_cmake_args
540-
"-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
541-
list(APPEND extra_deps amdhsa-loader)
542-
endif()
543-
endif()
544-
if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
545-
if(TARGET nvptx-loader)
546-
list(APPEND extra_cmake_args
547-
"-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
548-
list(APPEND extra_deps nvptx-loader)
549-
endif()
550-
endif()
551537
if(TARGET clang-offload-packager)
552538
list(APPEND extra_deps clang-offload-packager)
553539
endif()

llvm/tools/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
# traversing each directory.
1010
create_llvm_tool_options()
1111

12+
if(NOT LLVM_COMPILER_IS_GCC_COMPATIBLE)
13+
set(LLVM_TOOL_LLVM_GPU_LOADER_BUILD OFF)
14+
endif()
15+
1216
if(NOT LLVM_BUILD_LLVM_DYLIB AND NOT LLVM_BUILD_LLVM_C_DYLIB)
1317
set(LLVM_TOOL_LLVM_SHLIB_BUILD Off)
1418
endif()
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
set(LLVM_LINK_COMPONENTS
2+
BinaryFormat
3+
Object
4+
Option
5+
Support
6+
FrontendOffloading
7+
TargetParser
8+
)
9+
10+
add_llvm_tool(llvm-gpu-loader
11+
llvm-gpu-loader.cpp
12+
13+
# TODO: We intentionally split this currently due to statically linking the
14+
# GPU runtimes. Dynamically load the dependencies, possibly using the
15+
# LLVM offloading API when it is complete.
16+
PARTIAL_SOURCES_INTENDED
17+
18+
DEPENDS
19+
intrinsics_gen
20+
)
21+
22+
# Locate the RPC server handling interface.
23+
include(FindLibcCommonUtils)
24+
target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities)
25+
26+
# Check for HSA support for targeting AMD GPUs.
27+
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
28+
if(hsa-runtime64_FOUND)
29+
target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp)
30+
target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT)
31+
target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64)
32+
33+
# Compatibility with the old amdhsa-loader name.
34+
add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader)
35+
endif()
36+
37+
# Check for CUDA support for targeting NVIDIA GPUs.
38+
find_package(CUDAToolkit 11.2 QUIET)
39+
if(CUDAToolkit_FOUND)
40+
target_sources(llvm-gpu-loader PRIVATE nvptx.cpp)
41+
target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT)
42+
target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver)
43+
44+
# Compatibility with the old nvptx-loader name.
45+
add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader)
46+
endif()

libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp renamed to llvm/tools/llvm-gpu-loader/amdhsa.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
//
1414
//===----------------------------------------------------------------------===//
1515

16-
#include "Loader.h"
16+
#include "llvm-gpu-loader.h"
17+
#include "server.h"
1718

1819
#include "hsa/hsa.h"
1920
#include "hsa/hsa_ext_amd.h"
@@ -260,9 +261,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
260261
// Register RPC callbacks for the malloc and free functions on HSA.
261262
auto malloc_handler = [&](size_t size) -> void * {
262263
void *dev_ptr = nullptr;
263-
if (hsa_status_t err =
264-
hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
265-
/*flags=*/0, &dev_ptr))
264+
if (hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
265+
/*flags=*/0, &dev_ptr))
266266
dev_ptr = nullptr;
267267
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
268268
return dev_ptr;
@@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent,
330330
return HSA_STATUS_SUCCESS;
331331
}
332332

333-
int load(int argc, const char **argv, const char **envp, void *image,
334-
size_t size, const LaunchParameters &params,
335-
bool print_resource_usage) {
333+
int load_amdhsa(int argc, const char **argv, const char **envp, void *image,
334+
size_t size, const LaunchParameters &params,
335+
bool print_resource_usage) {
336336
// Initialize the HSA runtime used to communicate with the device.
337337
if (hsa_status_t err = hsa_init())
338338
handle_error(err);

libc/utils/gpu/loader/Main.cpp renamed to llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp

+43-30
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,25 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file opens a device image passed on the command line and passes it to
10-
// one of the loader implementations for launch.
9+
// This utility is used to launch standard programs onto the GPU in conjunction
10+
// with the LLVM 'libc' project. It is designed to mimic a standard emulator
11+
// workflow, allowing for unit tests to be run on the GPU directly.
1112
//
1213
//===----------------------------------------------------------------------===//
1314

14-
#include "Loader.h"
15+
#include "llvm-gpu-loader.h"
1516

1617
#include "llvm/BinaryFormat/Magic.h"
18+
#include "llvm/Object/ELF.h"
19+
#include "llvm/Object/ELFObjectFile.h"
1720
#include "llvm/Support/CommandLine.h"
1821
#include "llvm/Support/Error.h"
1922
#include "llvm/Support/FileSystem.h"
2023
#include "llvm/Support/MemoryBuffer.h"
2124
#include "llvm/Support/Path.h"
2225
#include "llvm/Support/Signals.h"
2326
#include "llvm/Support/WithColor.h"
27+
#include "llvm/TargetParser/Triple.h"
2428

2529
#include <cerrno>
2630
#include <cstdio>
@@ -67,12 +71,6 @@ static cl::opt<bool>
6771
cl::desc("Output resource usage of launched kernels"),
6872
cl::init(false), cl::cat(loader_category));
6973

70-
static cl::opt<bool>
71-
no_parallelism("no-parallelism",
72-
cl::desc("Allows only a single process to use the GPU at a "
73-
"time. Useful to suppress out-of-resource errors"),
74-
cl::init(false), cl::cat(loader_category));
75-
7674
static cl::opt<std::string> file(cl::Positional, cl::Required,
7775
cl::desc("<gpu executable>"),
7876
cl::cat(loader_category));
@@ -115,27 +113,42 @@ int main(int argc, const char **argv, const char **envp) {
115113
llvm::transform(args, std::back_inserter(new_argv),
116114
[](const std::string &arg) { return arg.c_str(); });
117115

118-
// Claim a file lock on the executable so only a single process can enter this
119-
// region if requested. This prevents the loader from spurious failures.
120-
int fd = -1;
121-
if (no_parallelism) {
122-
fd = open(get_main_executable(argv[0]).c_str(), O_RDONLY);
123-
if (flock(fd, LOCK_EX) == -1)
124-
report_error(createStringError("Failed to lock '%s': %s", argv[0],
125-
strerror(errno)));
126-
}
127-
128-
// Drop the loader from the program arguments.
129-
LaunchParameters params{threads_x, threads_y, threads_z,
130-
blocks_x, blocks_y, blocks_z};
131-
int ret = load(new_argv.size(), new_argv.data(), envp,
132-
const_cast<char *>(image.getBufferStart()),
133-
image.getBufferSize(), params, print_resource_usage);
134-
135-
if (no_parallelism) {
136-
if (flock(fd, LOCK_UN) == -1)
137-
report_error(createStringError("Failed to unlock '%s': %s", argv[0],
138-
strerror(errno)));
116+
Expected<llvm::object::ELF64LEObjectFile> elf_or_err =
117+
llvm::object::ELF64LEObjectFile::create(image);
118+
if (!elf_or_err)
119+
report_error(elf_or_err.takeError());
120+
121+
int ret = 1;
122+
if (elf_or_err->getArch() == Triple::amdgcn) {
123+
#ifdef AMDHSA_SUPPORT
124+
LaunchParameters params{threads_x, threads_y, threads_z,
125+
blocks_x, blocks_y, blocks_z};
126+
127+
ret = load_amdhsa(new_argv.size(), new_argv.data(), envp,
128+
const_cast<char *>(image.getBufferStart()),
129+
image.getBufferSize(), params, print_resource_usage);
130+
#else
131+
report_error(createStringError(
132+
"Unsupported architecture; %s",
133+
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
134+
#endif
135+
} else if (elf_or_err->getArch() == Triple::nvptx64) {
136+
#ifdef NVPTX_SUPPORT
137+
LaunchParameters params{threads_x, threads_y, threads_z,
138+
blocks_x, blocks_y, blocks_z};
139+
140+
ret = load_nvptx(new_argv.size(), new_argv.data(), envp,
141+
const_cast<char *>(image.getBufferStart()),
142+
image.getBufferSize(), params, print_resource_usage);
143+
#else
144+
report_error(createStringError(
145+
"Unsupported architecture; %s",
146+
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
147+
#endif
148+
} else {
149+
report_error(createStringError(
150+
"Unsupported architecture; %s",
151+
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
139152
}
140153

141154
return ret;

0 commit comments

Comments
 (0)