Rebase with main and fix formatting

r-devulap · r-devulap · commit 2b913b813cc9 · 2024-02-21T13:38:20.000-08:00
diff --git a/src/xss-common-qsort.h b/src/xss-common-qsort.h
@@ -87,7 +87,8 @@ X86_SIMD_SORT_INLINE bool array_has_nan(type_t *arr, arrsize_t size)
         else {
             in = vtype::loadu(arr + ii);
         }
-        auto nanmask = vtype::convert_mask_to_int(vtype::template fpclass<0x01 | 0x80>(in));
+        auto nanmask = vtype::convert_mask_to_int(
+                vtype::template fpclass<0x01 | 0x80>(in));
         if (nanmask != 0x00) {
             found_nan = true;
             break;
@@ -498,24 +499,20 @@ qsort_(type_t *arr, arrsize_t left, arrsize_t right, arrsize_t max_iters)
                 arr + left, (int32_t)(right + 1 - left));
         return;
     }
-    
+
     auto pivot_result = get_pivot_smart<vtype, type_t>(arr, left, right);
     type_t pivot = pivot_result.pivot;
-    
-    if (pivot_result.result == pivot_result_t::Sorted){
-        return;
-    }
-    
+
+    if (pivot_result.result == pivot_result_t::Sorted) { return; }
+
     type_t smallest = vtype::type_max();
     type_t biggest = vtype::type_min();
 
     arrsize_t pivot_index
             = partition_avx512_unrolled<vtype, vtype::partition_unroll_factor>(
                     arr, left, right + 1, pivot, &smallest, &biggest);
-    
-    if (pivot_result.result == pivot_result_t::Only2Values){
-        return;
-    }
+
+    if (pivot_result.result == pivot_result_t::Only2Values) { return; }
 
     if (pivot != smallest)
         qsort_<vtype>(arr, left, pivot_index - 1, max_iters - 1);
diff --git a/src/xss-network-keyvaluesort.hpp b/src/xss-network-keyvaluesort.hpp
@@ -441,9 +441,8 @@ bitonic_fullmerge_n_vec(typename keyType::reg_t *keys,
 }
 
 template <typename keyType, typename indexType, int numVecs>
-X86_SIMD_SORT_INLINE void argsort_n_vec(typename keyType::type_t *keys,
-                                        arrsize_t *indices,
-                                        int N)
+X86_SIMD_SORT_INLINE void
+argsort_n_vec(typename keyType::type_t *keys, arrsize_t *indices, int N)
 {
     using kreg_t = typename keyType::reg_t;
     using ireg_t = typename indexType::reg_t;
@@ -586,9 +585,8 @@ X86_SIMD_SORT_INLINE void kvsort_n_vec(typename keyType::type_t *keys,
 }
 
 template <typename keyType, typename indexType, int maxN>
-X86_SIMD_SORT_INLINE void argsort_n(typename keyType::type_t *keys,
-                                    arrsize_t *indices,
-                                    int N)
+X86_SIMD_SORT_INLINE void
+argsort_n(typename keyType::type_t *keys, arrsize_t *indices, int N)
 {
     static_assert(keyType::numlanes == indexType::numlanes,
                   "invalid pairing of value/index types");
diff --git a/src/xss-network-qsort.hpp b/src/xss-network-qsort.hpp
@@ -144,7 +144,8 @@ X86_SIMD_SORT_FINLINE void merge_n_vec(reg_t *regs)
 }
 
 template <typename vtype, int numVecs, typename reg_t = typename vtype::reg_t>
-X86_SIMD_SORT_FINLINE void sort_vectors(reg_t * vecs){
+X86_SIMD_SORT_FINLINE void sort_vectors(reg_t *vecs)
+{
     /* Run the initial sorting network to sort the columns of the [numVecs x
      * num_lanes] matrix
      */
@@ -188,7 +189,7 @@ X86_SIMD_SORT_INLINE void sort_n_vec(typename vtype::type_t *arr, int N)
         vecs[i] = vtype::mask_loadu(
                 vtype::zmm_max(), ioMasks[j], arr + i * vtype::numlanes);
     }
-    
+
     sort_vectors<vtype, numVecs>(vecs);
 
     // Unmasked part of the store
diff --git a/src/xss-pivot-selection.hpp b/src/xss-pivot-selection.hpp
@@ -6,26 +6,29 @@
 enum class pivot_result_t : int { Normal, Sorted, Only2Values };
 
 template <typename type_t>
-struct pivot_results{
-    
+struct pivot_results {
+
     pivot_result_t result = pivot_result_t::Normal;
     type_t pivot = 0;
-    
-    pivot_results(type_t _pivot, pivot_result_t _result = pivot_result_t::Normal){
+
+    pivot_results(type_t _pivot,
+                  pivot_result_t _result = pivot_result_t::Normal)
+    {
         pivot = _pivot;
         result = _result;
     }
 };
 
 template <typename type_t>
-type_t next_value(type_t value){
+type_t next_value(type_t value)
+{
     // TODO this probably handles non-native float16 wrong
-    if constexpr (std::is_floating_point<type_t>::value){
+    if constexpr (std::is_floating_point<type_t>::value) {
         return std::nextafter(value, std::numeric_limits<type_t>::infinity());
-    }else{
-        if (value < std::numeric_limits<type_t>::max()){
-            return value + 1;
-        }else{
+    }
+    else {
+        if (value < std::numeric_limits<type_t>::max()) { return value + 1; }
+        else {
             return value;
         }
     }
@@ -96,23 +99,23 @@ X86_SIMD_SORT_INLINE type_t get_pivot_blocks(type_t *arr,
 }
 
 template <typename vtype, typename type_t>
-X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_near_constant(type_t *arr,
-                                             type_t commonValue,
-                                             const arrsize_t left,
-                                             const arrsize_t right);
+X86_SIMD_SORT_INLINE pivot_results<type_t>
+get_pivot_near_constant(type_t *arr,
+                        type_t commonValue,
+                        const arrsize_t left,
+                        const arrsize_t right);
 
 template <typename vtype, typename type_t>
-X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_smart(type_t *arr,
-                                             const arrsize_t left,
-                                             const arrsize_t right)
+X86_SIMD_SORT_INLINE pivot_results<type_t>
+get_pivot_smart(type_t *arr, const arrsize_t left, const arrsize_t right)
 {
     using reg_t = typename vtype::reg_t;
     constexpr int numVecs = 4;
-    
-    if (right - left + 1 <= 4 * numVecs * vtype::numlanes){
-        return pivot_results<type_t>(get_pivot<vtype>(arr, left, right)); 
+
+    if (right - left + 1 <= 4 * numVecs * vtype::numlanes) {
+        return pivot_results<type_t>(get_pivot<vtype>(arr, left, right));
     }
-    
+
     constexpr int N = numVecs * vtype::numlanes;
 
     arrsize_t width = (right - vtype::numlanes) - left;
@@ -122,111 +125,123 @@ X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_smart(type_t *arr,
     for (int i = 0; i < numVecs; i++) {
         vecs[i] = vtype::loadu(arr + left + delta * i);
     }
-    
+
     // Sort the samples
     sort_vectors<vtype, numVecs>(vecs);
-    
+
     type_t samples[N];
-    for (int i = 0; i < numVecs; i++){
+    for (int i = 0; i < numVecs; i++) {
         vtype::storeu(samples + vtype::numlanes * i, vecs[i]);
     }
-    
+
     type_t smallest = samples[0];
     type_t largest = samples[N - 1];
     type_t median = samples[N / 2];
-    
-    if (smallest == largest){
+
+    if (smallest == largest) {
         // We have a very unlucky sample, or the array is constant / near constant
         // Run a special function meant to deal with this situation
         return get_pivot_near_constant<vtype, type_t>(arr, median, left, right);
-    }else if (median != smallest && median != largest){
+    }
+    else if (median != smallest && median != largest) {
         // We have a normal sample; use it's median
         return pivot_results<type_t>(median);
-    }else if (median == smallest){
+    }
+    else if (median == smallest) {
         // If median == smallest, that implies approximately half the array is equal to smallest, unless we were very unlucky with our sample
         // Try just doing the next largest value greater than this seemingly very common value to seperate them out
         return pivot_results<type_t>(next_value<type_t>(median));
-    }else if (median == largest){
+    }
+    else if (median == largest) {
         // If median == largest, that implies approximately half the array is equal to largest, unless we were very unlucky with our sample
         // Thus, median probably is a fine pivot, since it will move all of this common value into its own partition
         return pivot_results<type_t>(median);
-    }else{
+    }
+    else {
         // Should be unreachable
         return pivot_results<type_t>(median);
     }
-    
+
     // Should be unreachable
     return pivot_results<type_t>(median);
 }
 
 // Handles the case where we seem to have a near-constant array, since our sample of the array was constant
 template <typename vtype, typename type_t>
-X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_near_constant(type_t *arr,
-                                             type_t commonValue,
-                                             const arrsize_t left,
-                                             const arrsize_t right)
+X86_SIMD_SORT_INLINE pivot_results<type_t>
+get_pivot_near_constant(type_t *arr,
+                        type_t commonValue,
+                        const arrsize_t left,
+                        const arrsize_t right)
 {
     using reg_t = typename vtype::reg_t;
-    
+
     arrsize_t index = left;
-    
+
     type_t value1 = 0;
     type_t value2 = 0;
-    
+
     // First, search for any value not equal to the common value
     // First vectorized
     reg_t commonVec = vtype::set1(commonValue);
-    for (; index <= right - vtype::numlanes; index += vtype::numlanes){
+    for (; index <= right - vtype::numlanes; index += vtype::numlanes) {
         reg_t data = vtype::loadu(arr + index);
-        if (!vtype::all_false(vtype::knot_opmask(vtype::eq(data, commonVec)))){
+        if (!vtype::all_false(vtype::knot_opmask(vtype::eq(data, commonVec)))) {
             break;
         }
     }
-    
+
     // Than scalar at the end
-    for (; index <= right; index++){
-        if (arr[index] != commonValue){
+    for (; index <= right; index++) {
+        if (arr[index] != commonValue) {
             value1 = arr[index];
             break;
-        } 
+        }
     }
-    
-    if (index == right + 1){
+
+    if (index == right + 1) {
         // The array is completely constant
         // Setting the second flag to true skips partitioning, as the array is constant and thus sorted
         return pivot_results<type_t>(commonValue, pivot_result_t::Sorted);
     }
-    
+
     // Secondly, search for a second value not equal to either of the previous two
     // First vectorized
     reg_t value1Vec = vtype::set1(value1);
-    for (; index <= right - vtype::numlanes; index += vtype::numlanes){
+    for (; index <= right - vtype::numlanes; index += vtype::numlanes) {
         reg_t data = vtype::loadu(arr + index);
-        if (!vtype::all_false(vtype::knot_opmask(vtype::eq(data, commonVec))) && !vtype::all_false(vtype::knot_opmask(vtype::eq(data, value1Vec)))){
+        if (!vtype::all_false(vtype::knot_opmask(vtype::eq(data, commonVec)))
+            && !vtype::all_false(
+                    vtype::knot_opmask(vtype::eq(data, value1Vec)))) {
             break;
         }
     }
-    
+
     // Then scalar
-    for (; index <= right; index++){
-        if (arr[index] != commonValue && arr[index] != value1){
+    for (; index <= right; index++) {
+        if (arr[index] != commonValue && arr[index] != value1) {
             value2 = arr[index];
             break;
-        } 
+        }
     }
-    
-    if (index == right + 1){
+
+    if (index == right + 1) {
         // The array contains only 2 values
         // We must pick the larger one, else the right partition is empty
         // We can also skip recursing, as it is guaranteed both partitions are constant after partitioning with the larger value
         // TODO this logic now assumes we use greater than or equal to specifically when partitioning, might be worth noting that somewhere
         type_t pivot = std::max(value1, commonValue, comparison_func<vtype>);
         return pivot_results<type_t>(pivot, pivot_result_t::Only2Values);
     }
-    
+
     // The array has at least 3 distinct values. Use the middle one as the pivot
-    type_t median = std::max(std::min(value1,value2, comparison_func<vtype>), std::min(std::max(value1,value2, comparison_func<vtype>),commonValue, comparison_func<vtype>), comparison_func<vtype>);
+    type_t median = std::max(
+            std::min(value1, value2, comparison_func<vtype>),
+            std::min(std::max(value1, value2, comparison_func<vtype>),
+                     commonValue,
+                     comparison_func<vtype>),
+            comparison_func<vtype>);
     return pivot_results<type_t>(median);
 }
 
-#endif
+#endif

Original file line number	Diff line number	Diff line change
`@@ -441,9 +441,8 @@ bitonic_fullmerge_n_vec(typename keyType::reg_t *keys,`
`441`	`441`	`}`
`442`	`442`
`443`	`443`	`template <typename keyType, typename indexType, int numVecs>`
`444`		`-X86_SIMD_SORT_INLINE void argsort_n_vec(typename keyType::type_t *keys,`
`445`		`- arrsize_t *indices,`
`446`		`- int N)`
	`444`	`+X86_SIMD_SORT_INLINE void`
	`445`	`+argsort_n_vec(typename keyType::type_t keys, arrsize_t indices, int N)`
`447`	`446`	`{`
`448`	`447`	`using kreg_t = typename keyType::reg_t;`
`449`	`448`	`using ireg_t = typename indexType::reg_t;`
`@@ -586,9 +585,8 @@ X86_SIMD_SORT_INLINE void kvsort_n_vec(typename keyType::type_t *keys,`
`586`	`585`	`}`
`587`	`586`
`588`	`587`	`template <typename keyType, typename indexType, int maxN>`
`589`		`-X86_SIMD_SORT_INLINE void argsort_n(typename keyType::type_t *keys,`
`590`		`- arrsize_t *indices,`
`591`		`- int N)`
	`588`	`+X86_SIMD_SORT_INLINE void`
	`589`	`+argsort_n(typename keyType::type_t keys, arrsize_t indices, int N)`
`592`	`590`	`{`
`593`	`591`	`static_assert(keyType::numlanes == indexType::numlanes,`
`594`	`592`	`"invalid pairing of value/index types");`