r-abishek · Dineshbabu-Ravichandran · Dec 26, 2024 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/include/rpp.h b/include/rpp.h
@@ -146,7 +146,7 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreate(rppHandle_t* handle);
  * \retval rppStatusUnknownError
  * \retval rppStatusUnsupportedOp
  */
-extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads = 0);
+extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HOST_BACKEND);
 
 /*! \brief Destory RPP handle.
  * \details Function to destroy a RPP handle. To be called in the end to break down the RPP environment.
@@ -257,7 +257,7 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStream(rppHandle_t* handle, rp
  * \retval rppStatusUnknownError
  * \retval rppStatusUnsupportedOp
  */
-extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize);
+extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize , RppBackend backend);
 
 /*! \brief Destory RPP GPU handle.
  * \details Function to destroy a RPP handle's device memory allocation. To be called in the end to break down the RPP environment.

diff --git a/include/rppdefs.h b/include/rppdefs.h
@@ -167,6 +167,12 @@ typedef enum
     RPP_ERROR_INVALID_DST_DIMS          = -24
 } RppStatus;
 
+typedef enum
+{
+    RPP_HOST_BACKEND,
+    RPP_HIP_BACKEND
+} RppBackend;
+
 /*! \brief RPP rppStatus_t type enums
  * \ingroup group_rppdefs
  */

diff --git a/include/rppt_tensor_color_augmentations.h b/include/rppt_tensor_color_augmentations.h
@@ -42,7 +42,7 @@ extern "C" {
  * @{
  */
 
-/*! \brief Brightness augmentation on HOST backend for a NCHW/NHWC layout tensor
+/*! \brief Brightness augmentation on HOST/HIP backend for a NCHW/NHWC layout tensor
  * \details The brightness augmentation changes brightness of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
@@ -52,39 +52,16 @@ extern "C" {
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HOST memory
  * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
- * \param [in] alphaTensor alpha values for brightness calculation (1D tensor in HOST memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
- * \param [in] betaTensor beta values for brightness calculation (1D tensor in HOST memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
+ * \param [in] alphaTensor alpha values for brightness calculation (1D tensor in HOST/pinned memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
+ * \param [in] betaTensor beta values for brightness calculation (1D tensor in HOST/pinned memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
  * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
  * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
  * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
  */
-RppStatus rppt_brightness_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
-
-#ifdef GPU_SUPPORT
-/*! \brief Brightness augmentation on HIP backend for a NCHW/NHWC layout tensor
- * \details The brightness augmentation changes brightness of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
- * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
- * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.png Sample Input
- * \image html color_augmentations_brightness_img150x150.png Sample Output
- * \param [in] srcPtr source tensor in HIP memory
- * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
- * \param [out] dstPtr destination tensor in HIP memory
- * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
- * \param [in] alphaTensor alpha values for brightness calculation (1D tensor in pinned/HOST memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
- * \param [in] betaTensor beta values for brightness calculation (1D tensor in pinned/HOST memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
- * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
- * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
- * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
- * \return A <tt> \ref RppStatus</tt> enumeration.
- * \retval RPP_SUCCESS Successful completion.
- * \retval RPP_ERROR* Unsuccessful completion.
- */
-RppStatus rppt_brightness_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
-#endif // GPU_SUPPORT
+RppStatus rppt_brightness(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
 
 /*! \brief Gamma correction augmentation on HOST backend for a NCHW/NHWC layout tensor
  * \details The gamma correction augmentation does a non-linear gamma correction of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>

diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
@@ -428,42 +428,20 @@ RppStatus rppt_ricap_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt
  * - dstPtr depth ranges - Will be same depth as srcPtr.
  * \image html img150x150.png Sample Input
  * \image html effects_augmentations_vignette_img150x150.png Sample Output
- * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcPtr source tensor in HIP/HOST memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
- * \param [out] dstPtr destination tensor in HOST memory
+ * \param [out] dstPtr destination tensor in HIP/HOST memory
  * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
  * \param[in] vignetteIntensityTensor intensity values to quantify vignette effect (1D tensor of size batchSize with 0 < vignetteIntensityTensor[n] for each image in batch)
- * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiTensorPtrSrc ROI data in HIP/HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
  * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
- * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \param [in] rppHandle RPP HIP/HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
  */
 // NOTE: Pixel mismatch of 5% is expected between HIP and HOST Tensor variations due to usage of fastexpavx() instead of exp() in HOST Tensor.
-RppStatus rppt_vignette_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
-
-#ifdef GPU_SUPPORT
-/*! \brief Vignette augmentation on HIP backend for a NCHW/NHWC layout tensor
- * \details The vignette augmentation adds a vignette effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
- * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
- * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.png Sample Input
- * \image html effects_augmentations_vignette_img150x150.png Sample Output
- * \param [in] srcPtr source tensor in HIP memory
- * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
- * \param [out] dstPtr destination tensor in HIP memory
- * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
- * \param[in] vignetteIntensityTensor intensity values to quantify vignette effect (1D tensor of size batchSize with 0 < vignetteIntensityTensor[n] for each image in batch)
- * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
- * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
- * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
- * \return A <tt> \ref RppStatus</tt> enumeration.
- * \retval RPP_SUCCESS Successful completion.
- * \retval RPP_ERROR* Unsuccessful completion.
- */
-RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
-#endif // GPU_SUPPORT
+RppStatus rppt_vignette(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
 
 /*! \brief Jitter augmentation on HOST backend for a NCHW/NHWC layout tensor
  * \details The jitter augmentation adds a jitter effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>

diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp
@@ -63,12 +63,14 @@ struct Handle : rppHandle
 {
     Handle();
     Handle(size_t nBatchSize, Rpp32u numThreads = 0);
+    Handle(size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HIP_BACKEND);
     Handle(Handle&&) noexcept;
     ~Handle();
 
     InitHandle* GetInitHandle() const;
     size_t GetBatchSize() const;
     Rpp32u GetNumThreads() const;
+    RppBackend GetBackend() const;
     void SetBatchSize(size_t bSize) const;
     void rpp_destroy_object_host();
     std::unique_ptr<HandleImpl> impl;
@@ -81,11 +83,13 @@ struct Handle : rppHandle
     // Host handle related
     Handle();
     Handle(size_t nBatchSize, Rpp32u numThreads = 0);
+    Handle(size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HOST_BACKEND);
     Handle(Handle&&) noexcept;
     ~Handle();
     InitHandle*  GetInitHandle() const;
     size_t GetBatchSize() const;
     Rpp32u GetNumThreads() const;
+    RppBackend GetBackend() const;
     void SetBatchSize(size_t bSize) const;
     void rpp_destroy_object_host();
 
@@ -94,7 +98,7 @@ struct Handle : rppHandle
 
     // Device handle related
     Handle(rppAcceleratorQueue_t stream);
-    Handle(rppAcceleratorQueue_t stream, size_t nBatchSize);
+    Handle(rppAcceleratorQueue_t stream, size_t nBatchSize, RppBackend backend);
     void rpp_destroy_object_gpu();
     rppAcceleratorQueue_t GetStream() const;
     void SetStream(rppAcceleratorQueue_t streamID) const;

diff --git a/src/modules/handle_api.cpp b/src/modules/handle_api.cpp
@@ -56,9 +56,9 @@ extern "C" rppStatus_t rppCreate(rppHandle_t* handle)
     return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(); });
 }
 
-extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads)
+extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads, RppBackend backend)
 {
-    return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads); });
+    return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads, backend); });
 }
 
 extern "C" rppStatus_t rppDestroy(rppHandle_t handle)
@@ -88,9 +88,9 @@ extern "C" rppStatus_t rppCreateWithStream(rppHandle_t* handle, rppAcceleratorQu
     return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream); });
 }
 
-extern "C" rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize)
+extern "C" rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize, RppBackend backend)
 {
-    return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream, nBatchSize); });
+    return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream, nBatchSize, backend); });
 }
 
 extern "C" rppStatus_t rppDestroyGPU(rppHandle_t handle)

diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp
@@ -38,6 +38,7 @@ struct HandleImpl
 {
     size_t nBatchSize = 1;
     Rpp32u numThreads = 0;
+    RppBackend backend = RPP_HOST_BACKEND;
     InitHandle* initHandle = nullptr;
 
     void PreInitializeBufferCPU()
@@ -51,13 +52,14 @@ struct HandleImpl
     }
 };
 
-Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl())
+Handle::Handle(size_t batchSize, Rpp32u numThreads, RppBackend backend) : impl(new HandleImpl())
 {
     impl->nBatchSize = batchSize;
     numThreads = std::min(numThreads, std::thread::hardware_concurrency());
     if(numThreads == 0)
         numThreads = batchSize;
     impl->numThreads = numThreads;
+    impl->backend = backend;
     impl->PreInitializeBufferCPU();
 }
 
@@ -90,6 +92,11 @@ Rpp32u Handle::GetNumThreads() const
     return this->impl->numThreads;
 }
 
+RppBackend Handle::GetBackend() const
+{
+    return this->impl->backend;
+}
+
 void Handle::SetBatchSize(size_t bSize) const
 {
     this->impl->nBatchSize = bSize;

diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp
@@ -126,6 +126,7 @@ struct HandleImpl
     float profiling_result = 0.0;
     size_t nBatchSize = 1;
     Rpp32u numThreads = 0;
+    RppBackend backend = RppBackend::RPP_HIP_BACKEND;
     InitHandle* initHandle = nullptr;
 
     HandleImpl() : ctx(get_ctx()) {}
@@ -261,9 +262,10 @@ struct HandleImpl
     }
 };
 
-Handle::Handle(rppAcceleratorQueue_t stream, size_t batchSize) : impl(new HandleImpl())
+Handle::Handle(rppAcceleratorQueue_t stream, size_t batchSize, RppBackend backend) : impl(new HandleImpl())
 {
     impl->nBatchSize = batchSize;
+    impl->backend = backend;
     this->impl->device = get_device_id();
     this->impl->ctx = get_ctx();
 
@@ -292,13 +294,14 @@ Handle::Handle(rppAcceleratorQueue_t stream) : impl(new HandleImpl())
     RPP_LOG_I(*this);
 }
 
-Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl())
+Handle::Handle(size_t batchSize, Rpp32u numThreads, RppBackend backend) : impl(new HandleImpl())
 {
     impl->nBatchSize = batchSize;
     numThreads = std::min(numThreads, std::thread::hardware_concurrency());
     if(numThreads == 0)
         numThreads = batchSize;
     impl->numThreads = numThreads;
+    impl->backend = backend;
     this->SetAllocator(nullptr, nullptr, nullptr);
     impl->PreInitializeBufferCPU();
 }
@@ -413,6 +416,11 @@ Rpp32u Handle::GetNumThreads() const
     return this->impl->numThreads;
 }
 
+RppBackend Handle::GetBackend() const
+{
+    return this->impl->backend;
+}
+
 void Handle::SetBatchSize(size_t bSize) const
 {
     this->impl->nBatchSize = bSize;