@@ -330,7 +330,7 @@ struct zmm_vector<double> {
330330 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
331331 */
332332template <typename vtype, typename zmm_t = typename vtype::zmm_t >
333- X86_SIMD_SORT_FINLINE zmm_t sort_zmm_64bit (zmm_t zmm)
333+ X86_SIMD_SORT_INLINE zmm_t sort_zmm_64bit (zmm_t zmm)
334334{
335335 const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
336336 zmm = cmp_merge<vtype>(
@@ -353,7 +353,7 @@ X86_SIMD_SORT_FINLINE zmm_t sort_zmm_64bit(zmm_t zmm)
353353
354354// Assumes zmm is bitonic and performs a recursive half cleaner
355355template <typename vtype, typename zmm_t = typename vtype::zmm_t >
356- X86_SIMD_SORT_FINLINE zmm_t bitonic_merge_zmm_64bit (zmm_t zmm)
356+ X86_SIMD_SORT_INLINE zmm_t bitonic_merge_zmm_64bit (zmm_t zmm)
357357{
358358
359359 // 1) half_cleaner[8]: compare 0-4, 1-5, 2-6, 3-7
@@ -374,7 +374,7 @@ X86_SIMD_SORT_FINLINE zmm_t bitonic_merge_zmm_64bit(zmm_t zmm)
374374
375375// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
376376template <typename vtype, typename zmm_t = typename vtype::zmm_t >
377- X86_SIMD_SORT_FINLINE void bitonic_merge_two_zmm_64bit (zmm_t &zmm1, zmm_t &zmm2)
377+ X86_SIMD_SORT_INLINE void bitonic_merge_two_zmm_64bit (zmm_t &zmm1, zmm_t &zmm2)
378378{
379379 const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
380380 // 1) First step of a merging network: coex of zmm1 and zmm2 reversed
@@ -389,7 +389,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_two_zmm_64bit(zmm_t &zmm1, zmm_t &zmm2)
389389// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive
390390// half cleaner
391391template <typename vtype, typename zmm_t = typename vtype::zmm_t >
392- X86_SIMD_SORT_FINLINE void bitonic_merge_four_zmm_64bit (zmm_t *zmm)
392+ X86_SIMD_SORT_INLINE void bitonic_merge_four_zmm_64bit (zmm_t *zmm)
393393{
394394 const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
395395 // 1) First step of a merging network
@@ -411,7 +411,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_four_zmm_64bit(zmm_t *zmm)
411411}
412412
413413template <typename vtype, typename zmm_t = typename vtype::zmm_t >
414- X86_SIMD_SORT_FINLINE void bitonic_merge_eight_zmm_64bit (zmm_t *zmm)
414+ X86_SIMD_SORT_INLINE void bitonic_merge_eight_zmm_64bit (zmm_t *zmm)
415415{
416416 const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
417417 zmm_t zmm4r = vtype::permutexvar (rev_index, zmm[4 ]);
@@ -445,7 +445,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_eight_zmm_64bit(zmm_t *zmm)
445445}
446446
447447template <typename vtype, typename zmm_t = typename vtype::zmm_t >
448- X86_SIMD_SORT_FINLINE void bitonic_merge_sixteen_zmm_64bit (zmm_t *zmm)
448+ X86_SIMD_SORT_INLINE void bitonic_merge_sixteen_zmm_64bit (zmm_t *zmm)
449449{
450450 const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
451451 zmm_t zmm8r = vtype::permutexvar (rev_index, zmm[8 ]);
@@ -519,7 +519,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_sixteen_zmm_64bit(zmm_t *zmm)
519519}
520520
521521template <typename vtype, typename type_t >
522- X86_SIMD_SORT_FINLINE void sort_8_64bit (type_t *arr, int32_t N)
522+ X86_SIMD_SORT_INLINE void sort_8_64bit (type_t *arr, int32_t N)
523523{
524524 typename vtype::opmask_t load_mask = (0x01 << N) - 0x01 ;
525525 typename vtype::zmm_t zmm
@@ -528,7 +528,7 @@ X86_SIMD_SORT_FINLINE void sort_8_64bit(type_t *arr, int32_t N)
528528}
529529
530530template <typename vtype, typename type_t >
531- X86_SIMD_SORT_FINLINE void sort_16_64bit (type_t *arr, int32_t N)
531+ X86_SIMD_SORT_INLINE void sort_16_64bit (type_t *arr, int32_t N)
532532{
533533 if (N <= 8 ) {
534534 sort_8_64bit<vtype>(arr, N);
@@ -546,7 +546,7 @@ X86_SIMD_SORT_FINLINE void sort_16_64bit(type_t *arr, int32_t N)
546546}
547547
548548template <typename vtype, typename type_t >
549- X86_SIMD_SORT_FINLINE void sort_32_64bit (type_t *arr, int32_t N)
549+ X86_SIMD_SORT_INLINE void sort_32_64bit (type_t *arr, int32_t N)
550550{
551551 if (N <= 16 ) {
552552 sort_16_64bit<vtype>(arr, N);
@@ -577,7 +577,7 @@ X86_SIMD_SORT_FINLINE void sort_32_64bit(type_t *arr, int32_t N)
577577}
578578
579579template <typename vtype, typename type_t >
580- X86_SIMD_SORT_FINLINE void sort_64_64bit (type_t *arr, int32_t N)
580+ X86_SIMD_SORT_INLINE void sort_64_64bit (type_t *arr, int32_t N)
581581{
582582 if (N <= 32 ) {
583583 sort_32_64bit<vtype>(arr, N);
@@ -628,7 +628,7 @@ X86_SIMD_SORT_FINLINE void sort_64_64bit(type_t *arr, int32_t N)
628628}
629629
630630template <typename vtype, typename type_t >
631- X86_SIMD_SORT_FINLINE void sort_128_64bit (type_t *arr, int32_t N)
631+ X86_SIMD_SORT_INLINE void sort_128_64bit (type_t *arr, int32_t N)
632632{
633633 if (N <= 64 ) {
634634 sort_64_64bit<vtype>(arr, N);
@@ -718,9 +718,9 @@ X86_SIMD_SORT_FINLINE void sort_128_64bit(type_t *arr, int32_t N)
718718}
719719
720720template <typename vtype, typename type_t >
721- X86_SIMD_SORT_FINLINE type_t get_pivot_64bit (type_t *arr,
722- const int64_t left,
723- const int64_t right)
721+ X86_SIMD_SORT_INLINE type_t get_pivot_64bit (type_t *arr,
722+ const int64_t left,
723+ const int64_t right)
724724{
725725 // median of 8
726726 int64_t size = (right - left) / 8 ;
@@ -769,7 +769,7 @@ qsort_64bit_(type_t *arr, int64_t left, int64_t right, int64_t max_iters)
769769 qsort_64bit_<vtype>(arr, pivot_index, right, max_iters - 1 );
770770}
771771
772- X86_SIMD_SORT_FINLINE int64_t replace_nan_with_inf (double *arr, int64_t arrsize)
772+ X86_SIMD_SORT_INLINE int64_t replace_nan_with_inf (double *arr, int64_t arrsize)
773773{
774774 int64_t nan_count = 0 ;
775775 __mmask8 loadmask = 0xFF ;
@@ -785,7 +785,7 @@ X86_SIMD_SORT_FINLINE int64_t replace_nan_with_inf(double *arr, int64_t arrsize)
785785 return nan_count;
786786}
787787
788- X86_SIMD_SORT_FINLINE void
788+ X86_SIMD_SORT_INLINE void
789789replace_inf_with_nan (double *arr, int64_t arrsize, int64_t nan_count)
790790{
791791 for (int64_t ii = arrsize - 1 ; nan_count > 0 ; --ii) {
0 commit comments