@@ -889,32 +889,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
889889 LastEvent->wait ();
890890 }
891891 } else if (!isInOrder ()) {
892- std::vector<std::weak_ptr<event_impl>> WeakEvents;
893- {
894- std::lock_guard<std::mutex> Lock (MMutex);
895- WeakEvents.swap (MEventsWeak);
896- MMissedCleanupRequests.unset (
897- [&](MissedCleanupRequestsType &MissedCleanupRequests) {
898- for (auto &UpdatedGraph : MissedCleanupRequests)
899- doUnenqueuedCommandCleanup (UpdatedGraph);
900- MissedCleanupRequests.clear ();
901- });
902- }
903-
904- // Wait for unenqueued or host task events, starting
905- // from the latest submitted task in order to minimize total amount of
906- // calls, then handle the rest with urQueueFinish.
907- for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
908- EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
909- if (std::shared_ptr<event_impl> EventImplSharedPtr =
910- EventImplWeakPtrIt->lock ()) {
911- // A nullptr UR event indicates that urQueueFinish will not cover it,
912- // either because it's a host task event or an unenqueued one.
913- if (nullptr == EventImplSharedPtr->getHandle ()) {
914- EventImplSharedPtr->wait ();
915- }
916- }
917- }
892+ waitForRuntimeLevelCmdsAndClear ();
918893 }
919894
920895 getAdapter ().call <UrApiKind::urQueueFinish>(getHandleRef ());
@@ -1127,6 +1102,47 @@ void queue_impl::verifyProps(const property_list &Props) const {
11271102 CheckPropertiesWithData);
11281103}
11291104
1105+ void queue_impl::waitForRuntimeLevelCmdsAndClear () {
1106+ if (isInOrder () && !MNoLastEventMode.load (std::memory_order_relaxed)) {
1107+ // if MLastEvent is not null and has no associated handle, we need to wait
1108+ // for it. We do not clear it however.
1109+ EventImplPtr LastEvent;
1110+ {
1111+ std::lock_guard<std::mutex> Lock (MMutex);
1112+ LastEvent = MDefaultGraphDeps.LastEventPtr ;
1113+ }
1114+ if (LastEvent && nullptr == LastEvent->getHandle ())
1115+ LastEvent->wait ();
1116+ } else if (!isInOrder ()) {
1117+ std::vector<std::weak_ptr<event_impl>> WeakEvents;
1118+ {
1119+ std::lock_guard<std::mutex> Lock (MMutex);
1120+ WeakEvents.swap (MEventsWeak);
1121+ MMissedCleanupRequests.unset (
1122+ [&](MissedCleanupRequestsType &MissedCleanupRequests) {
1123+ for (auto &UpdatedGraph : MissedCleanupRequests)
1124+ doUnenqueuedCommandCleanup (UpdatedGraph);
1125+ MissedCleanupRequests.clear ();
1126+ });
1127+ }
1128+
1129+ // Wait for unenqueued or host task events, starting
1130+ // from the latest submitted task in order to minimize total amount of
1131+ // calls, then handle the rest with urQueueFinish.
1132+ for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
1133+ EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
1134+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
1135+ EventImplWeakPtrIt->lock ()) {
1136+ // A nullptr UR event indicates that urQueueFinish will not cover it,
1137+ // either because it's a host task event or an unenqueued one.
1138+ if (nullptr == EventImplSharedPtr->getHandle ()) {
1139+ EventImplSharedPtr->wait ();
1140+ }
1141+ }
1142+ }
1143+ }
1144+ }
1145+
11301146} // namespace detail
11311147} // namespace _V1
11321148} // namespace sycl
0 commit comments