Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unified external API #393

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreate(rppHandle_t* handle);
* \retval rppStatusUnknownError
* \retval rppStatusUnsupportedOp
*/
extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads = 0);
extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HOST_BACKEND);

/*! \brief Destory RPP handle.
* \details Function to destroy a RPP handle. To be called in the end to break down the RPP environment.
Expand Down Expand Up @@ -257,7 +257,7 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStream(rppHandle_t* handle, rp
* \retval rppStatusUnknownError
* \retval rppStatusUnsupportedOp
*/
extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize);
extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize , RppBackend backend);

/*! \brief Destory RPP GPU handle.
* \details Function to destroy a RPP handle's device memory allocation. To be called in the end to break down the RPP environment.
Expand Down
6 changes: 6 additions & 0 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ typedef enum
RPP_ERROR_INVALID_DST_DIMS = -24
} RppStatus;

typedef enum
{
RPP_HOST_BACKEND,
RPP_HIP_BACKEND
} RppBackend;

/*! \brief RPP rppStatus_t type enums
* \ingroup group_rppdefs
*/
Expand Down
31 changes: 4 additions & 27 deletions include/rppt_tensor_color_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extern "C" {
* @{
*/

/*! \brief Brightness augmentation on HOST backend for a NCHW/NHWC layout tensor
/*! \brief Brightness augmentation on HOST/HIP backend for a NCHW/NHWC layout tensor
* \details The brightness augmentation changes brightness of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
Expand All @@ -52,39 +52,16 @@ extern "C" {
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param [in] alphaTensor alpha values for brightness calculation (1D tensor in HOST memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
* \param [in] betaTensor beta values for brightness calculation (1D tensor in HOST memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
* \param [in] alphaTensor alpha values for brightness calculation (1D tensor in HOST/pinned memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
* \param [in] betaTensor beta values for brightness calculation (1D tensor in HOST/pinned memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
* \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_brightness_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Brightness augmentation on HIP backend for a NCHW/NHWC layout tensor
* \details The brightness augmentation changes brightness of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html color_augmentations_brightness_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param [in] alphaTensor alpha values for brightness calculation (1D tensor in pinned/HOST memory, of size batchSize, with 0 <= alpha <= 20 for each image in batch)
* \param [in] betaTensor beta values for brightness calculation (1D tensor in pinned/HOST memory, of size batchSize, with 0 <= beta <= 255 for each image in batch)
* \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_brightness_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT
RppStatus rppt_brightness(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *alphaTensor, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);

/*! \brief Gamma correction augmentation on HOST backend for a NCHW/NHWC layout tensor
* \details The gamma correction augmentation does a non-linear gamma correction of a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
Expand Down
32 changes: 5 additions & 27 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,42 +428,20 @@ RppStatus rppt_ricap_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_vignette_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcPtr source tensor in HIP/HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
* \param [out] dstPtr destination tensor in HIP/HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param[in] vignetteIntensityTensor intensity values to quantify vignette effect (1D tensor of size batchSize with 0 < vignetteIntensityTensor[n] for each image in batch)
* \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiTensorPtrSrc ROI data in HIP/HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \param [in] rppHandle RPP HIP/HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
// NOTE: Pixel mismatch of 5% is expected between HIP and HOST Tensor variations due to usage of fastexpavx() instead of exp() in HOST Tensor.
RppStatus rppt_vignette_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Vignette augmentation on HIP backend for a NCHW/NHWC layout tensor
* \details The vignette augmentation adds a vignette effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_vignette_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
* \param[in] vignetteIntensityTensor intensity values to quantify vignette effect (1D tensor of size batchSize with 0 < vignetteIntensityTensor[n] for each image in batch)
* \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
* \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT
RppStatus rppt_vignette(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *vignetteIntensityTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);

/*! \brief Jitter augmentation on HOST backend for a NCHW/NHWC layout tensor
* \details The jitter augmentation adds a jitter effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
Expand Down
6 changes: 5 additions & 1 deletion src/include/common/rpp/handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ struct Handle : rppHandle
{
Handle();
Handle(size_t nBatchSize, Rpp32u numThreads = 0);
Handle(size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HIP_BACKEND);
Handle(Handle&&) noexcept;
~Handle();

InitHandle* GetInitHandle() const;
size_t GetBatchSize() const;
Rpp32u GetNumThreads() const;
RppBackend GetBackend() const;
void SetBatchSize(size_t bSize) const;
void rpp_destroy_object_host();
std::unique_ptr<HandleImpl> impl;
Expand All @@ -81,11 +83,13 @@ struct Handle : rppHandle
// Host handle related
Handle();
Handle(size_t nBatchSize, Rpp32u numThreads = 0);
Handle(size_t nBatchSize, Rpp32u numThreads = 0, RppBackend backend = RPP_HOST_BACKEND);
Handle(Handle&&) noexcept;
~Handle();
InitHandle* GetInitHandle() const;
size_t GetBatchSize() const;
Rpp32u GetNumThreads() const;
RppBackend GetBackend() const;
void SetBatchSize(size_t bSize) const;
void rpp_destroy_object_host();

Expand All @@ -94,7 +98,7 @@ struct Handle : rppHandle

// Device handle related
Handle(rppAcceleratorQueue_t stream);
Handle(rppAcceleratorQueue_t stream, size_t nBatchSize);
Handle(rppAcceleratorQueue_t stream, size_t nBatchSize, RppBackend backend);
void rpp_destroy_object_gpu();
rppAcceleratorQueue_t GetStream() const;
void SetStream(rppAcceleratorQueue_t streamID) const;
Expand Down
8 changes: 4 additions & 4 deletions src/modules/handle_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ extern "C" rppStatus_t rppCreate(rppHandle_t* handle)
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(); });
}

extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads)
extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads, RppBackend backend)
{
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads); });
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads, backend); });
}

extern "C" rppStatus_t rppDestroy(rppHandle_t handle)
Expand Down Expand Up @@ -88,9 +88,9 @@ extern "C" rppStatus_t rppCreateWithStream(rppHandle_t* handle, rppAcceleratorQu
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream); });
}

extern "C" rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize)
extern "C" rppStatus_t rppCreateWithStreamAndBatchSize(rppHandle_t* handle, rppAcceleratorQueue_t stream, size_t nBatchSize, RppBackend backend)
{
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream, nBatchSize); });
return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(stream, nBatchSize, backend); });
}

extern "C" rppStatus_t rppDestroyGPU(rppHandle_t handle)
Expand Down
9 changes: 8 additions & 1 deletion src/modules/handlehost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct HandleImpl
{
size_t nBatchSize = 1;
Rpp32u numThreads = 0;
RppBackend backend = RPP_HOST_BACKEND;
InitHandle* initHandle = nullptr;

void PreInitializeBufferCPU()
Expand All @@ -51,13 +52,14 @@ struct HandleImpl
}
};

Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl())
Handle::Handle(size_t batchSize, Rpp32u numThreads, RppBackend backend) : impl(new HandleImpl())
{
impl->nBatchSize = batchSize;
numThreads = std::min(numThreads, std::thread::hardware_concurrency());
if(numThreads == 0)
numThreads = batchSize;
impl->numThreads = numThreads;
impl->backend = backend;
impl->PreInitializeBufferCPU();
}

Expand Down Expand Up @@ -90,6 +92,11 @@ Rpp32u Handle::GetNumThreads() const
return this->impl->numThreads;
}

RppBackend Handle::GetBackend() const
{
return this->impl->backend;
}

void Handle::SetBatchSize(size_t bSize) const
{
this->impl->nBatchSize = bSize;
Expand Down
12 changes: 10 additions & 2 deletions src/modules/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ struct HandleImpl
float profiling_result = 0.0;
size_t nBatchSize = 1;
Rpp32u numThreads = 0;
RppBackend backend = RppBackend::RPP_HIP_BACKEND;
InitHandle* initHandle = nullptr;

HandleImpl() : ctx(get_ctx()) {}
Expand Down Expand Up @@ -261,9 +262,10 @@ struct HandleImpl
}
};

Handle::Handle(rppAcceleratorQueue_t stream, size_t batchSize) : impl(new HandleImpl())
Handle::Handle(rppAcceleratorQueue_t stream, size_t batchSize, RppBackend backend) : impl(new HandleImpl())
{
impl->nBatchSize = batchSize;
impl->backend = backend;
this->impl->device = get_device_id();
this->impl->ctx = get_ctx();

Expand Down Expand Up @@ -292,13 +294,14 @@ Handle::Handle(rppAcceleratorQueue_t stream) : impl(new HandleImpl())
RPP_LOG_I(*this);
}

Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl())
Handle::Handle(size_t batchSize, Rpp32u numThreads, RppBackend backend) : impl(new HandleImpl())
{
impl->nBatchSize = batchSize;
numThreads = std::min(numThreads, std::thread::hardware_concurrency());
if(numThreads == 0)
numThreads = batchSize;
impl->numThreads = numThreads;
impl->backend = backend;
this->SetAllocator(nullptr, nullptr, nullptr);
impl->PreInitializeBufferCPU();
}
Expand Down Expand Up @@ -413,6 +416,11 @@ Rpp32u Handle::GetNumThreads() const
return this->impl->numThreads;
}

RppBackend Handle::GetBackend() const
{
return this->impl->backend;
}

void Handle::SetBatchSize(size_t bSize) const
{
this->impl->nBatchSize = bSize;
Expand Down
Loading