@@ -330,7 +330,7 @@ struct zmm_vector<double> {
330330 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg 
331331 */  
332332template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
333- X86_SIMD_SORT_FINLINE  zmm_t  sort_zmm_64bit (zmm_t  zmm)
333+ X86_SIMD_SORT_INLINE  zmm_t  sort_zmm_64bit (zmm_t  zmm)
334334{
335335    const  __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
336336    zmm = cmp_merge<vtype>(
@@ -353,7 +353,7 @@ X86_SIMD_SORT_FINLINE zmm_t sort_zmm_64bit(zmm_t zmm)
353353
354354//  Assumes zmm is bitonic and performs a recursive half cleaner
355355template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
356- X86_SIMD_SORT_FINLINE  zmm_t  bitonic_merge_zmm_64bit (zmm_t  zmm)
356+ X86_SIMD_SORT_INLINE  zmm_t  bitonic_merge_zmm_64bit (zmm_t  zmm)
357357{
358358
359359    //  1) half_cleaner[8]: compare 0-4, 1-5, 2-6, 3-7
@@ -374,7 +374,7 @@ X86_SIMD_SORT_FINLINE zmm_t bitonic_merge_zmm_64bit(zmm_t zmm)
374374
375375//  Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
376376template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
377- X86_SIMD_SORT_FINLINE  void  bitonic_merge_two_zmm_64bit (zmm_t  &zmm1, zmm_t  &zmm2)
377+ X86_SIMD_SORT_INLINE  void  bitonic_merge_two_zmm_64bit (zmm_t  &zmm1, zmm_t  &zmm2)
378378{
379379    const  __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
380380    //  1) First step of a merging network: coex of zmm1 and zmm2 reversed
@@ -389,7 +389,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_two_zmm_64bit(zmm_t &zmm1, zmm_t &zmm2)
389389//  Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive
390390//  half cleaner
391391template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
392- X86_SIMD_SORT_FINLINE  void  bitonic_merge_four_zmm_64bit (zmm_t  *zmm)
392+ X86_SIMD_SORT_INLINE  void  bitonic_merge_four_zmm_64bit (zmm_t  *zmm)
393393{
394394    const  __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
395395    //  1) First step of a merging network
@@ -411,7 +411,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_four_zmm_64bit(zmm_t *zmm)
411411}
412412
413413template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
414- X86_SIMD_SORT_FINLINE  void  bitonic_merge_eight_zmm_64bit (zmm_t  *zmm)
414+ X86_SIMD_SORT_INLINE  void  bitonic_merge_eight_zmm_64bit (zmm_t  *zmm)
415415{
416416    const  __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
417417    zmm_t  zmm4r = vtype::permutexvar (rev_index, zmm[4 ]);
@@ -445,7 +445,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_eight_zmm_64bit(zmm_t *zmm)
445445}
446446
447447template  <typename  vtype, typename  zmm_t  = typename  vtype::zmm_t >
448- X86_SIMD_SORT_FINLINE  void  bitonic_merge_sixteen_zmm_64bit (zmm_t  *zmm)
448+ X86_SIMD_SORT_INLINE  void  bitonic_merge_sixteen_zmm_64bit (zmm_t  *zmm)
449449{
450450    const  __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
451451    zmm_t  zmm8r = vtype::permutexvar (rev_index, zmm[8 ]);
@@ -519,7 +519,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_sixteen_zmm_64bit(zmm_t *zmm)
519519}
520520
521521template  <typename  vtype, typename  type_t >
522- X86_SIMD_SORT_FINLINE  void  sort_8_64bit (type_t  *arr, int32_t  N)
522+ X86_SIMD_SORT_INLINE  void  sort_8_64bit (type_t  *arr, int32_t  N)
523523{
524524    typename  vtype::opmask_t  load_mask = (0x01  << N) - 0x01 ;
525525    typename  vtype::zmm_t  zmm
@@ -528,7 +528,7 @@ X86_SIMD_SORT_FINLINE void sort_8_64bit(type_t *arr, int32_t N)
528528}
529529
530530template  <typename  vtype, typename  type_t >
531- X86_SIMD_SORT_FINLINE  void  sort_16_64bit (type_t  *arr, int32_t  N)
531+ X86_SIMD_SORT_INLINE  void  sort_16_64bit (type_t  *arr, int32_t  N)
532532{
533533    if  (N <= 8 ) {
534534        sort_8_64bit<vtype>(arr, N);
@@ -546,7 +546,7 @@ X86_SIMD_SORT_FINLINE void sort_16_64bit(type_t *arr, int32_t N)
546546}
547547
548548template  <typename  vtype, typename  type_t >
549- X86_SIMD_SORT_FINLINE  void  sort_32_64bit (type_t  *arr, int32_t  N)
549+ X86_SIMD_SORT_INLINE  void  sort_32_64bit (type_t  *arr, int32_t  N)
550550{
551551    if  (N <= 16 ) {
552552        sort_16_64bit<vtype>(arr, N);
@@ -577,7 +577,7 @@ X86_SIMD_SORT_FINLINE void sort_32_64bit(type_t *arr, int32_t N)
577577}
578578
579579template  <typename  vtype, typename  type_t >
580- X86_SIMD_SORT_FINLINE  void  sort_64_64bit (type_t  *arr, int32_t  N)
580+ X86_SIMD_SORT_INLINE  void  sort_64_64bit (type_t  *arr, int32_t  N)
581581{
582582    if  (N <= 32 ) {
583583        sort_32_64bit<vtype>(arr, N);
@@ -628,7 +628,7 @@ X86_SIMD_SORT_FINLINE void sort_64_64bit(type_t *arr, int32_t N)
628628}
629629
630630template  <typename  vtype, typename  type_t >
631- X86_SIMD_SORT_FINLINE  void  sort_128_64bit (type_t  *arr, int32_t  N)
631+ X86_SIMD_SORT_INLINE  void  sort_128_64bit (type_t  *arr, int32_t  N)
632632{
633633    if  (N <= 64 ) {
634634        sort_64_64bit<vtype>(arr, N);
@@ -718,9 +718,9 @@ X86_SIMD_SORT_FINLINE void sort_128_64bit(type_t *arr, int32_t N)
718718}
719719
720720template  <typename  vtype, typename  type_t >
721- X86_SIMD_SORT_FINLINE  type_t  get_pivot_64bit (type_t  *arr,
722-                                               const  int64_t  left,
723-                                               const  int64_t  right)
721+ X86_SIMD_SORT_INLINE  type_t  get_pivot_64bit (type_t  *arr,
722+                                             const  int64_t  left,
723+                                             const  int64_t  right)
724724{
725725    //  median of 8
726726    int64_t  size = (right - left) / 8 ;
@@ -769,7 +769,7 @@ qsort_64bit_(type_t *arr, int64_t left, int64_t right, int64_t max_iters)
769769        qsort_64bit_<vtype>(arr, pivot_index, right, max_iters - 1 );
770770}
771771
772- X86_SIMD_SORT_FINLINE  int64_t  replace_nan_with_inf (double  *arr, int64_t  arrsize)
772+ X86_SIMD_SORT_INLINE  int64_t  replace_nan_with_inf (double  *arr, int64_t  arrsize)
773773{
774774    int64_t  nan_count = 0 ;
775775    __mmask8 loadmask = 0xFF ;
@@ -785,7 +785,7 @@ X86_SIMD_SORT_FINLINE int64_t replace_nan_with_inf(double *arr, int64_t arrsize)
785785    return  nan_count;
786786}
787787
788- X86_SIMD_SORT_FINLINE  void 
788+ X86_SIMD_SORT_INLINE  void 
789789replace_inf_with_nan (double  *arr, int64_t  arrsize, int64_t  nan_count)
790790{
791791    for  (int64_t  ii = arrsize - 1 ; nan_count > 0 ; --ii) {
0 commit comments