Skip to content

Commit af30caa

Browse files
sterrettm2r-devulap
authored andcommitted
Fixed bug in avx2-64bit logic and cleaned up some special case handling
1 parent 88edcf7 commit af30caa

File tree

3 files changed

+13
-18
lines changed

3 files changed

+13
-18
lines changed

src/avx2-64bit-qsort.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ struct avx2_vector<int64_t> {
7070
} // TODO: this should broadcast bits as is?
7171
static opmask_t knot_opmask(opmask_t x)
7272
{
73-
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF);
73+
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF'FFFF'FFFF);
7474
return _mm256_xor_si256(x, allTrue);
7575
}
7676
static opmask_t get_partial_loadmask(uint64_t num_to_read)
@@ -249,7 +249,7 @@ struct avx2_vector<uint64_t> {
249249
}
250250
static opmask_t knot_opmask(opmask_t x)
251251
{
252-
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF);
252+
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF'FFFF'FFFF);
253253
return _mm256_xor_si256(x, allTrue);
254254
}
255255
static opmask_t get_partial_loadmask(uint64_t num_to_read)
@@ -439,7 +439,7 @@ struct avx2_vector<double> {
439439
}
440440
static opmask_t knot_opmask(opmask_t x)
441441
{
442-
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF);
442+
auto allTrue = _mm256_set1_epi64x(0xFFFF'FFFF'FFFF'FFFF);
443443
return _mm256_xor_si256(x, allTrue);
444444
}
445445
static opmask_t get_partial_loadmask(uint64_t num_to_read)

src/xss-common-qsort.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ qsort_(type_t *arr, arrsize_t left, arrsize_t right, arrsize_t max_iters)
502502
auto pivot_result = get_pivot_smart<vtype, type_t>(arr, left, right);
503503
type_t pivot = pivot_result.pivot;
504504

505-
if (pivot_result.alreadySorted){
505+
if (pivot_result.result == pivot_result_t::Sorted){
506506
return;
507507
}
508508

@@ -513,7 +513,7 @@ qsort_(type_t *arr, arrsize_t left, arrsize_t right, arrsize_t max_iters)
513513
= partition_avx512_unrolled<vtype, vtype::partition_unroll_factor>(
514514
arr, left, right + 1, pivot, &smallest, &biggest);
515515

516-
if (pivot_result.only2Values){
516+
if (pivot_result.result == pivot_result_t::Only2Values){
517517
return;
518518
}
519519

src/xss-pivot-selection.hpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,17 @@
33

44
#include "xss-network-qsort.hpp"
55

6+
enum class pivot_result_t : int { Normal, Sorted, Only2Values };
7+
68
template <typename type_t>
79
struct pivot_results{
8-
bool alreadySorted = false;
9-
bool only2Values = false;
10-
type_t pivot = 0;
1110

12-
pivot_results(type_t _pivot){
13-
pivot = _pivot;
14-
alreadySorted = false;
15-
}
11+
pivot_result_t result = pivot_result_t::Normal;
12+
type_t pivot = 0;
1613

17-
pivot_results(type_t _pivot, bool _alreadySorted){
14+
pivot_results(type_t _pivot, pivot_result_t _result = pivot_result_t::Normal){
1815
pivot = _pivot;
19-
alreadySorted = _alreadySorted;
16+
result = _result;
2017
}
2118
};
2219

@@ -197,7 +194,7 @@ X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_near_constant(type_t *arr,
197194
if (index == right + 1){
198195
// The array is completely constant
199196
// Setting the second flag to true skips partitioning, as the array is constant and thus sorted
200-
return pivot_results<type_t>(commonValue, true);
197+
return pivot_results<type_t>(commonValue, pivot_result_t::Sorted);
201198
}
202199

203200
// Secondly, search for a second value not equal to either of the previous two
@@ -224,9 +221,7 @@ X86_SIMD_SORT_INLINE pivot_results<type_t> get_pivot_near_constant(type_t *arr,
224221
// We can also skip recursing, as it is guaranteed both partitions are constant after partitioning with the larger value
225222
// TODO this logic now assumes we use greater than or equal to specifically when partitioning, might be worth noting that somewhere
226223
type_t pivot = std::max(value1, commonValue, comparison_func<vtype>);
227-
auto result = pivot_results<type_t>(pivot, false);
228-
result.only2Values = true;
229-
return result;
224+
return pivot_results<type_t>(pivot, pivot_result_t::Only2Values);
230225
}
231226

232227
// The array has at least 3 distinct values. Use the middle one as the pivot

0 commit comments

Comments
 (0)