Skip to content

Commit 29dfd03

Browse files
[SYCL] Get rid of device kernel info duplication (#20319)
With the introduction of DeviceKernelInfo, implicit local argument information is now duplicated in program manager. This patch removes the duplicate map and makes it so that device kernel info map is filled out during image registration, with the compile time information added when it's available (during the first submission of the kernel). Additinally, this patch ensures that device kernel info instances are added to the program manager only during image registration, and gets rid of entries from other sources that polluted the map.
1 parent 1ad5171 commit 29dfd03

File tree

13 files changed

+87
-74
lines changed

13 files changed

+87
-74
lines changed

sycl/source/detail/device_kernel_info.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,7 @@ inline namespace _V1 {
1313
namespace detail {
1414

1515
DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
16-
: CompileTimeKernelInfoTy(Info) {
17-
init(Name.data());
18-
}
19-
20-
void DeviceKernelInfo::init(std::string_view KernelName) {
21-
auto &PM = detail::ProgramManager::getInstance();
22-
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
23-
}
16+
: CompileTimeKernelInfoTy(Info) {}
2417

2518
template <typename OtherTy>
2619
inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS,
@@ -50,6 +43,10 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
5043
assert(Info == *this);
5144
}
5245

46+
void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) {
47+
assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos);
48+
MImplicitLocalArgPos = Pos;
49+
}
5350
} // namespace detail
5451
} // namespace _V1
5552
} // namespace sycl

sycl/source/detail/device_kernel_info.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
9696

9797
FastKernelSubcacheT &getKernelSubcache() { return MFastKernelSubcache; }
9898

99-
std::optional<int> getImplicitLocalArgPos() const {
99+
const std::optional<int> &getImplicitLocalArgPos() const {
100100
return MImplicitLocalArgPos;
101101
}
102102

103+
// Implicit local argument position is used only for some backends, so this
104+
// function allows setting it as more images are added.
105+
void setImplicitLocalArgPos(int Pos);
106+
103107
private:
104108
bool isCompileTimeInfoSet() const { return KernelSize != 0; }
105109

sycl/source/detail/get_device_kernel_info.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ inline namespace _V1 {
1616
namespace detail {
1717

1818
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
19-
return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info);
19+
return ProgramManager::getInstance().getDeviceKernelInfo(Info);
2020
}
2121

2222
} // namespace detail

sycl/source/detail/kernel_impl.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
3131
MCreatedFromSource(true),
3232
MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this()
3333
: nullptr),
34-
MIsInterop(true), MKernelArgMaskPtr{ArgMask},
35-
MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
34+
MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true),
35+
MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
3636
ur_context_handle_t UrContext = nullptr;
3737
// Using the adapter from the passed ContextImpl
3838
getAdapter().call<UrApiKind::urKernelGetInfo>(
@@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
5959
MKernelBundleImpl(KernelBundleImpl.shared_from_this()),
6060
MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop),
6161
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex},
62-
MInteropDeviceKernelInfo(MIsInterop
63-
? createCompileTimeKernelInfo(getName())
64-
: createCompileTimeKernelInfo()) {
62+
MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()),
63+
MDeviceKernelInfo(MOwnsDeviceKernelInfo
64+
? createCompileTimeKernelInfo(getName())
65+
: createCompileTimeKernelInfo()) {
66+
6567
// Enable USM indirect access for interop and non-sycl-jit source kernels.
6668
// sycl-jit kernels will enable this if needed through the regular kernel
6769
// path.
@@ -121,6 +123,16 @@ std::string_view kernel_impl::getName() const {
121123
return MName;
122124
}
123125

126+
bool kernel_impl::checkOwnsDeviceKernelInfo() {
127+
// If the image originates from something other than standard offline
128+
// compilation, this kernel needs to own its info structure.
129+
// We could also have a mixed origin image, in which case the device kernel
130+
// info might reside in program manager.
131+
return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline &&
132+
(!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) ||
133+
!ProgramManager::getInstance().tryGetDeviceKernelInfo(getName()));
134+
}
135+
124136
bool kernel_impl::isBuiltInKernel(device_impl &Device) const {
125137
auto BuiltInKernels = Device.get_info<info::device::built_in_kernel_ids>();
126138
if (BuiltInKernels.empty())

sycl/source/detail/kernel_impl.hpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,11 @@ class kernel_impl {
235235
std::mutex *getCacheMutex() const { return MCacheMutex; }
236236
std::string_view getName() const;
237237

238+
bool checkOwnsDeviceKernelInfo();
238239
DeviceKernelInfo &getDeviceKernelInfo() {
239-
return MIsInterop
240-
? MInteropDeviceKernelInfo
241-
: ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
240+
return MOwnsDeviceKernelInfo
241+
? MDeviceKernelInfo
242+
: ProgramManager::getInstance().getDeviceKernelInfo(
242243
std::string_view(getName()));
243244
}
244245

@@ -255,9 +256,11 @@ class kernel_impl {
255256
std::mutex *MCacheMutex = nullptr;
256257
mutable std::string MName;
257258

258-
// It is used for the interop kernels only.
259+
// Used for images that aren't obtained with standard SYCL offline
260+
// compilation.
259261
// For regular kernel we get DeviceKernelInfo from the ProgramManager.
260-
DeviceKernelInfo MInteropDeviceKernelInfo;
262+
bool MOwnsDeviceKernelInfo = false;
263+
DeviceKernelInfo MDeviceKernelInfo;
261264

262265
bool isBuiltInKernel(device_impl &Device) const;
263266
void checkIfValidForNumArgsInfoQuery() const;

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,27 +1510,34 @@ void ProgramManager::cacheKernelImplicitLocalArg(
15101510
Img.getImplicitLocalArg();
15111511
if (ImplicitLocalArgRange.isAvailable())
15121512
for (auto Prop : ImplicitLocalArgRange) {
1513-
m_KernelImplicitLocalArgPos[Prop->Name] =
1514-
DeviceBinaryProperty(Prop).asUint32();
1513+
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
1514+
assert(It != m_DeviceKernelInfoMap.end());
1515+
It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32());
15151516
}
15161517
}
15171518

1518-
DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
1519-
const CompileTimeKernelInfoTy &Info) {
1519+
DeviceKernelInfo &
1520+
ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
15201521
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1521-
auto [Iter, Inserted] = m_DeviceKernelInfoMap.try_emplace(Info.Name, Info);
1522-
if (!Inserted)
1523-
Iter->second.setCompileTimeInfoIfNeeded(Info);
1524-
return Iter->second;
1522+
auto It = m_DeviceKernelInfoMap.find(Info.Name);
1523+
assert(It != m_DeviceKernelInfoMap.end());
1524+
It->second.setCompileTimeInfoIfNeeded(Info);
1525+
return It->second;
15251526
}
15261527

15271528
DeviceKernelInfo &
1528-
ProgramManager::getOrCreateDeviceKernelInfo(std::string_view KernelName) {
1529+
ProgramManager::getDeviceKernelInfo(std::string_view KernelName) {
15291530
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1530-
CompileTimeKernelInfoTy DefaultCompileTimeInfo{KernelName};
1531-
auto Result =
1532-
m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo);
1533-
return Result.first->second;
1531+
auto It = m_DeviceKernelInfoMap.find(KernelName);
1532+
assert(It != m_DeviceKernelInfoMap.end());
1533+
return It->second;
1534+
}
1535+
1536+
DeviceKernelInfo *
1537+
ProgramManager::tryGetDeviceKernelInfo(std::string_view KernelName) {
1538+
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1539+
auto It = m_DeviceKernelInfoMap.find(KernelName);
1540+
return It != m_DeviceKernelInfoMap.end() ? &It->second : nullptr;
15341541
}
15351542

15361543
static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg,
@@ -1731,6 +1738,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
17311738
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
17321739
KernelIDs->push_back(It->second);
17331740

1741+
CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)};
1742+
m_DeviceKernelInfoMap.try_emplace(std::string_view(name),
1743+
DefaultCompileTimeInfo);
1744+
17341745
// Keep track of image to kernel name reference count for cleanup.
17351746
m_KernelNameRefCount[name]++;
17361747
}
@@ -1922,7 +1933,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
19221933
if (--RefCount == 0) {
19231934
// TODO aggregate all these maps into a single one since their entries
19241935
// share lifetime.
1925-
m_KernelImplicitLocalArgPos.erase(Name);
19261936
m_DeviceKernelInfoMap.erase(Name);
19271937
m_KernelNameRefCount.erase(RefCountIt);
19281938
if (Name2IDIt != m_KernelName2KernelIDs.end())

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -381,17 +381,11 @@ class ProgramManager {
381381

382382
SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }
383383

384-
std::optional<int>
385-
kernelImplicitLocalArgPos(std::string_view KernelName) const {
386-
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
387-
if (it != m_KernelImplicitLocalArgPos.end())
388-
return it->second;
389-
return {};
390-
}
384+
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);
391385

392-
DeviceKernelInfo &
393-
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
394-
DeviceKernelInfo &getOrCreateDeviceKernelInfo(std::string_view KernelName);
386+
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
387+
DeviceKernelInfo &getDeviceKernelInfo(std::string_view KernelName);
388+
DeviceKernelInfo *tryGetDeviceKernelInfo(std::string_view KernelName);
395389

396390
std::set<const RTDeviceBinaryImage *>
397391
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
@@ -420,9 +414,6 @@ class ProgramManager {
420414
/// Dumps image to current directory
421415
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
422416

423-
/// Add info on kernels using local arg into cache
424-
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);
425-
426417
std::set<const RTDeviceBinaryImage *>
427418
collectDependentDeviceImagesForVirtualFunctions(
428419
const RTDeviceBinaryImage &Img, const device_impl &Dev);
@@ -529,12 +520,6 @@ class ProgramManager {
529520
bool m_UseSpvFile = false;
530521
RTDeviceBinaryImageUPtr m_SpvFileImage;
531522

532-
// std::less<> is a transparent comparator that enabled comparison between
533-
// different types without temporary key_type object creation. This includes
534-
// standard overloads, such as comparison between std::string and
535-
// std::string_view or just char*.
536-
std::unordered_map<std::string_view, int> m_KernelImplicitLocalArgPos;
537-
538523
// Map for storing device kernel information. Runtime lookup should be avoided
539524
// by caching the pointers when possible.
540525
std::unordered_map<std::string_view, DeviceKernelInfo> m_DeviceKernelInfoMap;

sycl/source/detail/scheduler/commands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2448,7 +2448,7 @@ static ur_result_t SetKernelParamsAndLaunch(
24482448
applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc);
24492449
}
24502450

2451-
std::optional<int> ImplicitLocalArg =
2451+
const std::optional<int> &ImplicitLocalArg =
24522452
DeviceKernelInfo.getImplicitLocalArgPos();
24532453
// Set the implicit local memory buffer to support
24542454
// get_work_group_scratch_memory. This is for backend not supporting

sycl/source/handler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ detail::EventImplPtr handler::finalize() {
488488
// Fetch the device kernel info pointer if it hasn't been set (e.g.
489489
// in kernel bundle or free function cases).
490490
impl->MKernelData.setDeviceKernelInfoPtr(
491-
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
491+
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
492492
std::string_view(MKernelName)));
493493
}
494494
assert(impl->MKernelData.getKernelName() == MKernelName);

sycl/test-e2e/Config/kernel_from_file.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
// REQUIRES: target-spir
22

3-
// FIXME Disabled fallback assert as it'll require either online linking or
4-
// explicit offline linking step here
53
// FIXME separate compilation requires -fno-sycl-dead-args-optimization
64
// As we are doing a separate device compilation here, we need to explicitly
75
// add the device lib instrumentation (itt_compiler_wrapper)
8-
// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
6+
// RUN: %clangxx -Wno-error=ignored-attributes -DUSED_KERNEL -fno-sycl-dead-args-optimization %cxx_std_optionc++17 -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
97
// >> ---- unbundle compiler wrapper and asan device objects
108
// RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle
119
// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %}
1210
// >> ---- link device code
1311
// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %}
1412
// >> ---- translate to SPIR-V
1513
// RUN: llvm-spirv -o %t.spv %t_app.bc
16-
// RUN: %clangxx -Wno-error=ignored-attributes %sycl_include -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -Xclang -verify-ignore-unexpected=note,warning %if preview-mode %{-Wno-unused-command-line-argument%}
14+
// Need to perform full compilation here since the SYCL runtime uses image
15+
// properties from the multi-architecture binary.
16+
// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out
1717
// RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out
1818

1919
#include <iostream>
@@ -31,10 +31,15 @@ int main(int argc, char **argv) {
3131
event e = myQueue.submit([&](handler &cgh) {
3232
auto ptr = buf.get_access<access::mode::read_write>(cgh);
3333

34-
cgh.single_task<class my_kernel>([=]() { ptr[0]++; });
34+
cgh.single_task<class my_kernel>([=]() {
35+
#ifdef USED_KERNEL
36+
ptr[0]++;
37+
#else
38+
ptr[0]--;
39+
#endif
40+
});
3541
});
3642
e.wait_and_throw();
37-
3843
} catch (sycl::exception const &e) {
3944
std::cerr << "SYCL exception caught:\n";
4045
std::cerr << e.what() << "\n";

0 commit comments

Comments
 (0)