Skip to content

Commit 928a919

Browse files
[SYCL][L0] Gracefully handle the case that L0 was already unloaded when we do cleanup (#8948)
Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 29536ed commit 928a919

File tree

4 files changed

+84
-25
lines changed

4 files changed

+84
-25
lines changed

sycl/include/sycl/detail/pi_error.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ _PI_ERRC(PI_ERROR_INVALID_VA_API_MEDIA_ADAPTER_INTEL, -1098)
9797
_PI_ERRC(PI_ERROR_INVALID_VA_API_MEDIA_SURFACE_INTEL, -1099)
9898
_PI_ERRC(PI_ERROR_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL, -1100)
9999
_PI_ERRC(PI_ERROR_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL, -1101)
100+
// backend is lost, e.g. it was already unloaded
101+
_PI_ERRC(PI_ERROR_UNINITIALIZED, -1102)
100102

101103
// PI specific error codes
102104
// PI_ERROR_PLUGIN_SPECIFIC_ERROR indicates that an backend spcific error or

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,11 @@ pi_result _pi_context::finalize() {
632632
std::scoped_lock<pi_mutex> Lock(EventCacheMutex);
633633
for (auto &EventCache : EventCaches) {
634634
for (auto &Event : EventCache) {
635-
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
635+
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
636+
// Gracefully handle the case that L0 was already unloaded.
637+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
638+
return mapError(ZeResult);
639+
636640
delete Event;
637641
}
638642
EventCache.clear();
@@ -641,26 +645,41 @@ pi_result _pi_context::finalize() {
641645
{
642646
std::scoped_lock<pi_mutex> Lock(ZeEventPoolCacheMutex);
643647
for (auto &ZePoolCache : ZeEventPoolCache) {
644-
for (auto &ZePool : ZePoolCache)
645-
ZE_CALL(zeEventPoolDestroy, (ZePool));
648+
for (auto &ZePool : ZePoolCache) {
649+
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
650+
// Gracefully handle the case that L0 was already unloaded.
651+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
652+
return mapError(ZeResult);
653+
}
646654
ZePoolCache.clear();
647655
}
648656
}
649657

650658
// Destroy the command list used for initializations
651-
ZE_CALL(zeCommandListDestroy, (ZeCommandListInit));
659+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListInit));
660+
// Gracefully handle the case that L0 was already unloaded.
661+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
662+
return mapError(ZeResult);
652663

653664
std::scoped_lock<pi_mutex> Lock(ZeCommandListCacheMutex);
654665
for (auto &List : ZeComputeCommandListCache) {
655666
for (ze_command_list_handle_t &ZeCommandList : List.second) {
656-
if (ZeCommandList)
657-
ZE_CALL(zeCommandListDestroy, (ZeCommandList));
667+
if (ZeCommandList) {
668+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
669+
// Gracefully handle the case that L0 was already unloaded.
670+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
671+
return mapError(ZeResult);
672+
}
658673
}
659674
}
660675
for (auto &List : ZeCopyCommandListCache) {
661676
for (ze_command_list_handle_t &ZeCommandList : List.second) {
662-
if (ZeCommandList)
663-
ZE_CALL(zeCommandListDestroy, (ZeCommandList));
677+
if (ZeCommandList) {
678+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandList));
679+
// Gracefully handle the case that L0 was already unloaded.
680+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
681+
return mapError(ZeResult);
682+
}
664683
}
665684
}
666685
return PI_SUCCESS;
@@ -2423,9 +2442,12 @@ pi_result ContextReleaseHelper(pi_context Context) {
24232442
// and therefore it must be valid at that point.
24242443
// Technically it should be placed to the destructor of pi_context
24252444
// but this makes API error handling more complex.
2426-
if (DestoryZeContext)
2427-
ZE_CALL(zeContextDestroy, (DestoryZeContext));
2428-
2445+
if (DestoryZeContext) {
2446+
auto ZeResult = ZE_CALL_NOCHECK(zeContextDestroy, (DestoryZeContext));
2447+
// Gracefully handle the case that L0 was already unloaded.
2448+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
2449+
return mapError(ZeResult);
2450+
}
24292451
return Result;
24302452
}
24312453

@@ -2707,8 +2729,12 @@ pi_result piQueueRelease(pi_queue Queue) {
27072729
// runtime. Destroy only if a queue is healthy. Destroying a fence may
27082730
// cause a hang otherwise.
27092731
// If the fence is a nullptr we are using immediate commandlists.
2710-
if (Queue->Healthy && it->second.ZeFence != nullptr)
2711-
ZE_CALL(zeFenceDestroy, (it->second.ZeFence));
2732+
if (Queue->Healthy && it->second.ZeFence != nullptr) {
2733+
auto ZeResult = ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
2734+
// Gracefully handle the case that L0 was already unloaded.
2735+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
2736+
return mapError(ZeResult);
2737+
}
27122738
}
27132739
Queue->CommandListMap.clear();
27142740
}
@@ -2744,8 +2770,12 @@ static pi_result piQueueReleaseInternal(pi_queue Queue) {
27442770
{Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID})
27452771
for (auto &QueueGroup : QueueMap)
27462772
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
2747-
if (ZeQueue)
2748-
ZE_CALL(zeCommandQueueDestroy, (ZeQueue));
2773+
if (ZeQueue) {
2774+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
2775+
// Gracefully handle the case that L0 was already unloaded.
2776+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
2777+
return mapError(ZeResult);
2778+
}
27492779
}
27502780

27512781
urPrint("piQueueRelease(compute) NumTimesClosedFull %d, "
@@ -3115,7 +3145,11 @@ pi_result piMemRelease(pi_mem Mem) {
31153145
if (Mem->isImage()) {
31163146
char *ZeHandleImage;
31173147
PI_CALL(Mem->getZeHandle(ZeHandleImage, _pi_mem::write_only));
3118-
ZE_CALL(zeImageDestroy, (pi_cast<ze_image_handle_t>(ZeHandleImage)));
3148+
auto ZeResult = ZE_CALL_NOCHECK(
3149+
zeImageDestroy, (pi_cast<ze_image_handle_t>(ZeHandleImage)));
3150+
// Gracefully handle the case that L0 was already unloaded.
3151+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
3152+
return mapError(ZeResult);
31193153
} else {
31203154
auto Buffer = static_cast<pi_buffer>(Mem);
31213155
Buffer->free();
@@ -4326,8 +4360,12 @@ pi_result piKernelRelease(pi_kernel Kernel) {
43264360
return PI_SUCCESS;
43274361

43284362
auto KernelProgram = Kernel->Program;
4329-
if (Kernel->OwnZeKernel)
4330-
ZE_CALL(zeKernelDestroy, (Kernel->ZeKernel));
4363+
if (Kernel->OwnZeKernel) {
4364+
auto ZeResult = ZE_CALL_NOCHECK(zeKernelDestroy, (Kernel->ZeKernel));
4365+
// Gracefully handle the case that L0 was already unloaded.
4366+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
4367+
return mapError(ZeResult);
4368+
}
43314369
if (IndirectAccessTrackingEnabled) {
43324370
PI_CALL(piContextRelease(KernelProgram->Context));
43334371
}
@@ -5148,7 +5186,11 @@ static pi_result piEventReleaseInternal(pi_event Event) {
51485186
}
51495187
if (Event->OwnZeEvent) {
51505188
if (DisableEventsCaching) {
5151-
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
5189+
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
5190+
// Gracefully handle the case that L0 was already unloaded.
5191+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
5192+
return mapError(ZeResult);
5193+
51525194
auto Context = Event->Context;
51535195
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
51545196
return Res;
@@ -5396,9 +5438,12 @@ pi_result piSamplerRelease(pi_sampler Sampler) {
53965438
if (!Sampler->RefCount.decrementAndTest())
53975439
return PI_SUCCESS;
53985440

5399-
ZE_CALL(zeSamplerDestroy, (Sampler->ZeSampler));
5400-
delete Sampler;
5441+
auto ZeResult = ZE_CALL_NOCHECK(zeSamplerDestroy, (Sampler->ZeSampler));
5442+
// Gracefully handle the case that L0 was already unloaded.
5443+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
5444+
return mapError(ZeResult);
54015445

5446+
delete Sampler;
54025447
return PI_SUCCESS;
54035448
}
54045449

@@ -7393,9 +7438,19 @@ static pi_result USMFreeHelper(pi_context Context, void *Ptr,
73937438

73947439
// Query memory type of the pointer we're freeing to determine the correct
73957440
// way to do it(directly or via an allocator)
7396-
ZE_CALL(zeMemGetAllocProperties,
7397-
(Context->ZeContext, Ptr, &ZeMemoryAllocationProperties,
7398-
&ZeDeviceHandle));
7441+
auto ZeResult =
7442+
ZE_CALL_NOCHECK(zeMemGetAllocProperties,
7443+
(Context->ZeContext, Ptr, &ZeMemoryAllocationProperties,
7444+
&ZeDeviceHandle));
7445+
7446+
// Handle the case that L0 RT was already unloaded
7447+
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
7448+
if (IndirectAccessTrackingEnabled)
7449+
PI_CALL(ContextReleaseHelper(Context));
7450+
return PI_SUCCESS;
7451+
} else if (ZeResult) {
7452+
return mapError(ZeResult);
7453+
}
73997454

74007455
// If memory type is host release from host pool
74017456
if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_HOST) {

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ static pi_result ur2piResult(ur_result_t urResult) {
4545
return PI_ERROR_OUT_OF_HOST_MEMORY;
4646
case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE:
4747
return PI_ERROR_BUILD_PROGRAM_FAILURE;
48+
case UR_RESULT_ERROR_UNINITIALIZED:
49+
return PI_ERROR_UNINITIALIZED;
4850
default:
4951
return PI_ERROR_UNKNOWN;
5052
};

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ ur_result_t ze2urResult(ze_result_t ZeResult) {
2020
case ZE_RESULT_ERROR_NOT_AVAILABLE:
2121
return UR_RESULT_ERROR_INVALID_OPERATION;
2222
case ZE_RESULT_ERROR_UNINITIALIZED:
23-
return UR_RESULT_ERROR_INVALID_PLATFORM;
23+
return UR_RESULT_ERROR_UNINITIALIZED;
2424
case ZE_RESULT_ERROR_INVALID_ARGUMENT:
2525
return UR_RESULT_ERROR_INVALID_ARGUMENT;
2626
case ZE_RESULT_ERROR_INVALID_NULL_POINTER:

0 commit comments

Comments
 (0)