[PTI-SDK] Update dlworkloads, fix libpti_view segv (#72)

mschilling0 · maaswani · web-flow · commit f23ec0e41035 · 2024-02-20T12:43:32.000-05:00
* Update dlworkloads with fix for segv on onednn &gt;= 2024.0.0
* Fix libpti_view segv when the collector is not able to initialize.
  This usually occurs when the graphics drivers are not installed,
  or there is no GPU in the system.

Signed-off-by: Schilling, Matthew &lt;matthew.schilling@intel.com&gt;
Co-authored-by: Aswani, Mahesh &lt;mahesh.aswani@intel.com&gt;
diff --git a/sdk/VERSION b/sdk/VERSION
@@ -1 +1 @@
-0.3.4
+0.3.5
diff --git a/sdk/samples/dlworkloads/main.cpp b/sdk/samples/dlworkloads/main.cpp
@@ -38,9 +38,6 @@ void PrintUsage()
   std::cout << std::endl;
   std::cout << "It is supposed that this application will be updated frequently, so this might be not the latest one." << std::endl;
   std::cout << std::endl;
-#if __LIBSYCL_MAJOR_VERSION >= 7
-  std::cerr << "Notice: A portion of this sample was not build. To build the whole sample, revert to older oneAPI release (<= 2023.2.0)" << std::endl;
-#endif
 }
 
 void run(sycl::queue *q)
diff --git a/sdk/samples/dlworkloads/model_mixedprogramming.cpp b/sdk/samples/dlworkloads/model_mixedprogramming.cpp
@@ -15,15 +15,9 @@ TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q)
   TinyTensor outp = run_syclkernel_operation_scaledown(inp, q);
   GlobalDeviceMemoryManager().free(inp.data);
 
-  // TODO(matthew.schilling@intel.com): Fails when run with XPTI tracing. We
-  // need to figure out a way to uncomment this. It crashes PTI-SDK and
-  // Unitrace built with OneAPI/ICPX >= 2024.0.0 .
-  // the next operation uses oneDNN for conv2d
-#if __LIBSYCL_MAJOR_VERSION < 7
   inp = outp;
   outp = run_onednn_operation_conv2d(inp, q);
   GlobalDeviceMemoryManager().free(inp.data);
-#endif
 
   // next operation uses oneMKL
   inp = outp;
diff --git a/sdk/samples/dlworkloads/operation_onednn.cpp b/sdk/samples/dlworkloads/operation_onednn.cpp
@@ -8,14 +8,15 @@
 
 #include "operation_onednn.h"
 #include "utils.h"
+#include "device_memory.h"
 
 // code as simple as possible for the demo
 namespace {
 inline auto& Conv2dWeightsInstance() {
     static TinyTensor conv2d_weights(0, 0, 0, 0);
     return conv2d_weights;
 }
-} // namespace 
+} // namespace
 
 void onednn_prepare_weights(int oc, int ic, int ks, sycl::queue *q)
 {
@@ -84,6 +85,7 @@ TinyTensor run_onednn_operation_conv2d(const TinyTensor& inp, sycl::queue *q)
     );
 
     dnnl::primitive_attr pattr;
+    pattr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
     auto conv_pd = dnnl::convolution_forward::primitive_desc(
                 eng,
                 dnnl::prop_kind::forward_inference,
@@ -119,21 +121,18 @@ TinyTensor run_onednn_operation_conv2d(const TinyTensor& inp, sycl::queue *q)
     assert(conv_pd.dst_desc() == dst_mem.get_desc());
     assert(conv_pd.weights_desc() == weights_mem.get_desc());
 
-    int scratchpad_size = conv_pd.scratchpad_desc().get_size();
-    static bool warning_shown = false;
-    if (scratchpad_size == 0) {
-        if (!warning_shown) {
-            warning_shown = true;
-            // std::cout << __FILE__ << ":" << __LINE__;
-            // std::cout << " we need a onednn case that scratchpad_size > 0, to verify if it can be allocated within onednn for sycl grapch capture mode" << std::endl;
-        }
-    }
+    dnnl::memory::desc scratchpad_md = conv_pd.scratchpad_desc();
+    auto scratchpad_size = scratchpad_md.get_size();
+    auto* scratchpad_ptr = GlobalDeviceMemoryManager().alloc(scratchpad_size/sizeof(float)+1);
+    dnnl::memory scratchpad(scratchpad_md, eng, scratchpad_ptr);
 
     auto conv = dnnl::convolution_forward(conv_pd);
     conv.execute(s,
                 {{DNNL_ARG_SRC, src_mem},
                  {DNNL_ARG_WEIGHTS, weights_mem},
-                 {DNNL_ARG_DST, dst_mem}});
+                 {DNNL_ARG_DST, dst_mem},
+                 {DNNL_ARG_SCRATCHPAD, scratchpad}});
 
+    GlobalDeviceMemoryManager().free(scratchpad_ptr);
     return outp;
 }
diff --git a/sdk/src/view_handler.h b/sdk/src/view_handler.h
@@ -146,8 +146,10 @@ struct PtiViewRecordHandler {
   virtual ~PtiViewRecordHandler() {
     overhead::overhead_collection_enabled = false;
     DisableTracing();
-    collector_->DisableTracing();
-    delete collector_;
+    if (collector_) {
+      collector_->DisableTracing();
+      delete collector_;
+    }
     stop_consumer_thread_ = true;
     buffer_queue_.ResetBufferDepth();
     buffer_queue_.Push(ViewBuffer{});  // Stop consumer

Original file line number	Diff line number	Diff line change
`@@ -38,9 +38,6 @@ void PrintUsage()`
`38`	`38`	`std::cout << std::endl;`
`39`	`39`	`std::cout << "It is supposed that this application will be updated frequently, so this might be not the latest one." << std::endl;`
`40`	`40`	`std::cout << std::endl;`
`41`		`-#if __LIBSYCL_MAJOR_VERSION >= 7`
`42`		`- std::cerr << "Notice: A portion of this sample was not build. To build the whole sample, revert to older oneAPI release (<= 2023.2.0)" << std::endl;`
`43`		`-#endif`
`44`	`41`	`}`
`45`	`42`
`46`	`43`	`void run(sycl::queue *q)`