-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Log1p #377
base: develop
Are you sure you want to change the base?
Log1p #377
Conversation
HOST Implementation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ManasaDattaT please address the review comments
* \retval RPP_SUCCESS Successful completion. | ||
* \retval RPP_ERROR* Unsuccessful completion. | ||
*/ | ||
RppStatus rppt_log1p_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstGenericDescPtr, Rpp32u *roiTensor, rppHandle_t rppHandle); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please check the gpu declaration, it is repeated twice
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
src/include/cpu/rpp_cpu_simd.hpp
Outdated
@@ -148,6 +149,8 @@ const __m128i xmm_pxMask04To07 = _mm_setr_epi8(4, 0x80, 0x80, 0x80, 5, 0x80, 0x8 | |||
const __m128i xmm_pxMask08To11 = _mm_setr_epi8(8, 0x80, 0x80, 0x80, 9, 0x80, 0x80, 0x80, 10, 0x80, 0x80, 0x80, 11, 0x80, 0x80, 0x80); | |||
const __m128i xmm_pxMask12To15 = _mm_setr_epi8(12, 0x80, 0x80, 0x80, 13, 0x80, 0x80, 0x80, 14, 0x80, 0x80, 0x80, 15, 0x80, 0x80, 0x80); | |||
|
|||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please remove these extra spaces
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
src/include/cpu/rpp_cpu_simd.hpp
Outdated
@@ -2693,6 +2713,8 @@ static inline __m256 log_ps(__m256 x) | |||
__m256 one = *(__m256 *)&avx_p1; | |||
__m256 invalid_mask = _mm256_cmp_ps(x, avx_p0, _CMP_LE_OQ); | |||
|
|||
// x = _mm256_add_ps(x, one); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please remove the commented-out lines, if these are not necessary
@@ -464,6 +464,18 @@ __device__ __forceinline__ float rpp_hip_unpack3(int src) | |||
{ | |||
return (float)(schar)((src >> 24) & 0xFF); | |||
} | |||
// Un-Packing from I16s | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove blank line at L468
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
else if ((srcGenericDescPtr->dataType == RpptDataType::I8) && (dstGenericDescPtr->dataType == RpptDataType::I8)) return RPP_ERROR_INVALID_DST_DATATYPE; | ||
else if ((srcGenericDescPtr->dataType == RpptDataType::I16) && (dstGenericDescPtr->dataType == RpptDataType::F32)) | ||
{ | ||
log1p_generic_host_tensor(static_cast<Rpp16s *>(srcPtr) + srcGenericDescPtr->offsetInBytes, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please check the alignment here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -559,4 +594,66 @@ RppStatus rppt_log_gpu(RppPtr_t srcPtr, | |||
#endif // backend | |||
} | |||
|
|||
RppStatus rppt_log1p_gpu(RppPtr_t srcPtr, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
check alignment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
set_generic_descriptor(srcDescriptorPtrND, nDim, offSetInBytes, bitDepth, batchSize, roiTensor); | ||
set_generic_descriptor(dstDescriptorPtrND, nDim, offSetInBytes, bitDepth, batchSize, roiTensor); | ||
set_generic_descriptor(dstDescriptorPtrND, nDim, offSetInBytes, 2, batchSize, roiTensor); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess you have hardcoded the value for your use case, please revert these changes
@@ -129,7 +142,7 @@ void fill_roi_values(Rpp32u nDim, Rpp32u batchSize, Rpp32u *roiTensor, bool qaMo | |||
} | |||
case 4: | |||
{ | |||
std::array<Rpp32u, 8> roi = {0, 0, 0, 0, 1, 128, 128, 128}; | |||
std::array<Rpp32u, 8> roi = {0, 0, 0, 0, 128, 128, 128, 4}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please revert the changes
@@ -369,8 +384,14 @@ void compare_output(Rpp32f *outputF32, Rpp32u nDim, Rpp32u batchSize, Rpp32u buf | |||
for(int j = 0; j < sampleLength; j++) | |||
{ | |||
bool invalid_comparision = ((out[j] == 0.0f) && (ref[j] != 0.0f)); | |||
if(!invalid_comparision && abs(out[j] - ref[j]) < 1e-4) | |||
cnt++; | |||
if(!invalid_comparision ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please revert these changes
Host and HIP implementation of log1p , taken reference from log
In HOST in AVX2, for 3d and 4d combined the dimension to vectorize.
In HIP for log in 3d implementation as there is usage of multiple kernel launches, used Nd for 3d as well in log1p
And some changes were hardcoded in the test suite to make it able to run for I16 datatype, which are not meant to be pushed but just to evaluate the QA test.