Skip to content

Commit 13246e2

Browse files
committed
Add src/x86simdsort-static-incl.h to include all static avx512/avx2 methods
1 parent 8cc37bd commit 13246e2

25 files changed

+176
-117
lines changed

examples/Makefile

+13-19
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,24 @@
1-
CXX ?= g++-12
2-
CFLAGS = -I../src -std=c++17 -O3 $(if $(CXXFLAGS),$(CXXFLAGS),)
3-
EXE = qsort32avx2 argsort kvsort qsortfp16 qsort16 qsort32 qsort64
1+
CXX ?= g++-13
2+
CFLAGS = -I../src -std=c++17 -O3
3+
EXE = kvsort qsortavx2 qsortavx512 qsortspr qsorticl
44

55
default: all
66
all : $(EXE)
77

8-
qsortfp16: avx512fp-16bit-qsort.cpp
9-
$(CXX) -o qsortfp16 -march=sapphirerapids $(CFLAGS) avx512fp-16bit-qsort.cpp
10-
11-
qsort16: avx512-16bit-qsort.cpp
12-
$(CXX) -o qsort16 -march=icelake-client $(CFLAGS) avx512-16bit-qsort.cpp
13-
14-
qsort32: avx512-32bit-qsort.cpp
15-
$(CXX) -o qsort32 -march=skylake-avx512 $(CFLAGS) avx512-32bit-qsort.cpp
8+
kvsort: avx512-kv.cpp
9+
$(CXX) -o kvsort -mavx512vl -mavx512dq $(CFLAGS) avx512-kv.cpp
1610

17-
qsort32avx2: avx2-32bit-qsort.cpp
18-
$(CXX) -o qsort32avx2 -march=haswell $(CFLAGS) avx2-32bit-qsort.cpp
11+
qsortavx512: skx-avx2.cpp
12+
$(CXX) -o qsortavx512 -mavx512vl -mavx512dq $(CFLAGS) skx-avx2.cpp
1913

20-
qsort64: avx512-64bit-qsort.cpp
21-
$(CXX) -o qsort64 -march=skylake-avx512 $(CFLAGS) avx512-64bit-qsort.cpp
14+
qsortavx2: skx-avx2.cpp
15+
$(CXX) -o qsortavx2 -mavx2 $(CFLAGS) skx-avx2.cpp
2216

23-
argsort: avx512-argsort.cpp
24-
$(CXX) -o argsort -march=skylake-avx512 $(CFLAGS) avx512-argsort.cpp
17+
qsorticl: icl-16bit.cpp
18+
$(CXX) -o qsorticl -mavx512vl -mavx512bw -mavx512dq -mavx512vbmi2 $(CFLAGS) icl-16bit.cpp
2519

26-
kvsort: avx512-kv.cpp
27-
$(CXX) -o kvsort -march=skylake-avx512 $(CFLAGS) avx512-kv.cpp
20+
qsortspr: spr-16bit.cpp
21+
$(CXX) -o qsortspr -mavx512vl -mavx512dq -mavx512vbmi2 -mavx512fp16 $(CFLAGS) spr-16bit.cpp
2822

2923
clean:
3024
$(RM) $(EXE)

examples/avx2-32bit-qsort.cpp

-11
This file was deleted.

examples/avx512-16bit-qsort.cpp

-11
This file was deleted.

examples/avx512-32bit-qsort.cpp

-11
This file was deleted.

examples/avx512-64bit-qsort.cpp

-11
This file was deleted.

examples/avx512-argsort.cpp

-10
This file was deleted.

examples/avx512-kv.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "avx512-64bit-keyvaluesort.hpp"
1+
#include "x86simdsort-static-incl.h"
22

33
int main()
44
{
@@ -7,17 +7,17 @@ int main()
77
uint64_t arr2[size];
88
double arr3[size];
99
float arr4[size];
10-
avx512_qsort_kv(arr1, arr1, size);
11-
avx512_qsort_kv(arr1, arr2, size);
12-
avx512_qsort_kv(arr1, arr3, size);
13-
avx512_qsort_kv(arr2, arr1, size);
14-
avx512_qsort_kv(arr2, arr2, size);
15-
avx512_qsort_kv(arr2, arr3, size);
16-
avx512_qsort_kv(arr3, arr1, size);
17-
avx512_qsort_kv(arr3, arr2, size);
18-
avx512_qsort_kv(arr1, arr4, size);
19-
avx512_qsort_kv(arr2, arr4, size);
20-
avx512_qsort_kv(arr3, arr4, size);
10+
x86simdsortStatic::keyvalue_qsort(arr1, arr1, size);
11+
x86simdsortStatic::keyvalue_qsort(arr1, arr2, size);
12+
x86simdsortStatic::keyvalue_qsort(arr1, arr3, size);
13+
x86simdsortStatic::keyvalue_qsort(arr2, arr1, size);
14+
x86simdsortStatic::keyvalue_qsort(arr2, arr2, size);
15+
x86simdsortStatic::keyvalue_qsort(arr2, arr3, size);
16+
x86simdsortStatic::keyvalue_qsort(arr3, arr1, size);
17+
x86simdsortStatic::keyvalue_qsort(arr3, arr2, size);
18+
x86simdsortStatic::keyvalue_qsort(arr1, arr4, size);
19+
x86simdsortStatic::keyvalue_qsort(arr2, arr4, size);
20+
x86simdsortStatic::keyvalue_qsort(arr3, arr4, size);
2121
return 0;
2222
return 0;
2323
}

examples/avx512fp-16bit-qsort.cpp

-11
This file was deleted.

examples/icl-16bit.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include "x86simdsort-static-incl.h"
2+
3+
int main()
4+
{
5+
const int size = 1000;
6+
short arr[size];
7+
x86simdsortStatic::qsort(arr, size);
8+
x86simdsortStatic::qselect(arr, 10, size);
9+
x86simdsortStatic::partial_qsort(arr, 10, size);
10+
return 0;
11+
}

examples/skx-avx2.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "x86simdsort-static-incl.h"
2+
3+
int main()
4+
{
5+
const int size = 1000;
6+
double arrd[size];
7+
float arrf[size];
8+
x86simdsortStatic::qsort(arrf, size);
9+
x86simdsortStatic::qsort(arrd, size);
10+
x86simdsortStatic::qselect(arrf, 10, size);
11+
x86simdsortStatic::qselect(arrd, 10, size);
12+
x86simdsortStatic::partial_qsort(arrf, 10, size);
13+
x86simdsortStatic::partial_qsort(arrd, 10, size);
14+
auto arg1 = x86simdsortStatic::argsort(arrf, size);
15+
auto arg2 = x86simdsortStatic::argselect(arrf, 10, size);
16+
auto arg3 = x86simdsortStatic::argsort(arrd, size);
17+
auto arg4 = x86simdsortStatic::argselect(arrd, 10, size);
18+
return 0;
19+
}

examples/spr-16bit.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include "x86simdsort-static-incl.h"
2+
3+
int main()
4+
{
5+
const int size = 1000;
6+
_Float16 arr[size];
7+
x86simdsortStatic::qsort(arr, size);
8+
x86simdsortStatic::qselect(arr, 10, size);
9+
x86simdsortStatic::partial_qsort(arr, 10, size);
10+
return 0;
11+
}

lib/x86simdsort-avx2.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
// AVX2 specific routines:
2-
#include "avx2-32bit-qsort.hpp"
3-
#include "avx2-64bit-qsort.hpp"
4-
#include "avx2-32bit-half.hpp"
5-
#include "xss-common-argsort.h"
2+
#include "x86simdsort-static-incl.h"
63
#include "x86simdsort-internal.h"
74

85
#define DEFINE_ALL_METHODS(type) \

lib/x86simdsort-icl.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// ICL specific routines:
2-
#include "avx512-16bit-qsort.hpp"
2+
#include "x86simdsort-static-incl.h"
33
#include "x86simdsort-internal.h"
44

55
namespace xss {

lib/x86simdsort-skx.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
// SKX specific routines:
2-
#include "avx512-32bit-qsort.hpp"
3-
#include "avx512-64bit-keyvaluesort.hpp"
4-
#include "avx512-64bit-argsort.hpp"
5-
#include "avx512-64bit-qsort.hpp"
2+
#include "x86simdsort-static-incl.h"
63
#include "x86simdsort-internal.h"
74

85
#define DEFINE_ALL_METHODS(type) \

lib/x86simdsort-spr.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// SPR specific routines:
2-
#include "avx512fp16-16bit-qsort.hpp"
2+
#include "x86simdsort-static-incl.h"
33
#include "x86simdsort-internal.h"
44

55
namespace xss {

src/avx2-32bit-qsort.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#ifndef AVX2_QSORT_32BIT
88
#define AVX2_QSORT_32BIT
99

10-
#include "xss-common-qsort.h"
1110
#include "avx2-emu-funcs.hpp"
1211

1312
/*

src/avx2-64bit-qsort.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#ifndef AVX2_QSORT_64BIT
99
#define AVX2_QSORT_64BIT
1010

11-
#include "xss-common-qsort.h"
1211
#include "avx2-emu-funcs.hpp"
1312

1413
/*

src/avx2-emu-funcs.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include <array>
55
#include <utility>
6-
#include "xss-common-qsort.h"
76

87
constexpr auto avx2_mask_helper_lut32 = [] {
98
std::array<std::array<int32_t, 8>, 256> lut {};

src/avx512-16bit-common.h

-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
#ifndef AVX512_16BIT_COMMON
88
#define AVX512_16BIT_COMMON
99

10-
#include "xss-common-qsort.h"
11-
1210
/*
1311
* Constants used in sorting 32 elements in a ZMM registers. Based on Bitonic
1412
* sorting network (see

src/avx512-32bit-qsort.hpp

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
#ifndef AVX512_QSORT_32BIT
99
#define AVX512_QSORT_32BIT
1010

11-
#include "xss-common-qsort.h"
12-
1311
/*
1412
* Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
1513
* sorting network (see

src/avx512-64bit-keyvaluesort.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#ifndef AVX512_QSORT_64BIT_KV
99
#define AVX512_QSORT_64BIT_KV
1010

11-
#include "xss-common-qsort.h"
1211
#include "avx512-64bit-common.h"
1312
#include "xss-network-keyvaluesort.hpp"
1413

src/avx512-64bit-qsort.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#ifndef AVX512_QSORT_64BIT
88
#define AVX512_QSORT_64BIT
99

10-
#include "xss-common-qsort.h"
1110
#include "avx512-64bit-common.h"
1211

1312
#endif // AVX512_QSORT_64BIT

src/x86simdsort-static-incl.h

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#ifndef X86_SIMD_SORT_STATIC_METHODS
2+
#define X86_SIMD_SORT_STATIC_METHODS
3+
#include <vector>
4+
#include <stdlib.h>
5+
// Declare all methods:
6+
namespace x86simdsortStatic {
7+
template <typename T>
8+
X86_SIMD_SORT_FINLINE void qsort(T *arr, size_t size, bool hasnan = true);
9+
10+
template <typename T>
11+
X86_SIMD_SORT_FINLINE void
12+
qselect(T *arr, size_t k, size_t size, bool hasnan = true);
13+
14+
template <typename T>
15+
X86_SIMD_SORT_FINLINE void
16+
partial_qsort(T *arr, size_t k, size_t size, bool hasnan = true);
17+
18+
template <typename T>
19+
X86_SIMD_SORT_FINLINE std::vector<size_t>
20+
argsort(T *arr, size_t size, bool hasnan = true);
21+
22+
template <typename T>
23+
std::vector<size_t> X86_SIMD_SORT_FINLINE
24+
argselect(T *arr, size_t k, size_t size, bool hasnan = true);
25+
26+
template <typename T1, typename T2>
27+
X86_SIMD_SORT_FINLINE void
28+
keyvalue_qsort(T1 *key, T2 *val, size_t size, bool hasnan = true);
29+
} // namespace x86simdsortStatic
30+
31+
#define XSS_METHODS(ISA) \
32+
template <typename T> \
33+
X86_SIMD_SORT_FINLINE void x86simdsortStatic::qsort( \
34+
T *arr, size_t size, bool hasnan) \
35+
{ \
36+
ISA##_qsort(arr, size, hasnan); \
37+
} \
38+
template <typename T> \
39+
X86_SIMD_SORT_FINLINE void x86simdsortStatic::qselect( \
40+
T *arr, size_t k, size_t size, bool hasnan) \
41+
{ \
42+
ISA##_qselect(arr, k, size, hasnan); \
43+
} \
44+
template <typename T> \
45+
X86_SIMD_SORT_FINLINE void x86simdsortStatic::partial_qsort( \
46+
T *arr, size_t k, size_t size, bool hasnan) \
47+
{ \
48+
ISA##_partial_qsort(arr, k, size, hasnan); \
49+
} \
50+
template <typename T> \
51+
X86_SIMD_SORT_FINLINE std::vector<size_t> x86simdsortStatic::argsort( \
52+
T *arr, size_t size, bool hasnan) \
53+
{ \
54+
return ISA##_argsort(arr, size, hasnan); \
55+
} \
56+
template <typename T> \
57+
X86_SIMD_SORT_FINLINE std::vector<size_t> x86simdsortStatic::argselect( \
58+
T *arr, size_t k, size_t size, bool hasnan) \
59+
{ \
60+
return ISA##_argselect(arr, k, size, hasnan); \
61+
}
62+
63+
/*
64+
* qsort, qselect, partial, argsort key-value sort template functions.
65+
*/
66+
#include "xss-common-qsort.h"
67+
#include "xss-common-argsort.h"
68+
69+
#if defined(__AVX512DQ__) && defined(__AVX512VL__)
70+
/* 32-bit and 64-bit dtypes vector definitions on SKX */
71+
#include "avx512-32bit-qsort.hpp"
72+
#include "avx512-64bit-qsort.hpp"
73+
#include "avx512-64bit-argsort.hpp"
74+
#include "avx512-64bit-keyvaluesort.hpp"
75+
76+
/* 16-bit dtypes vector definitions on ICL */
77+
#if defined(__AVX512BW__) && defined(__AVX512VBMI2__)
78+
#include "avx512-16bit-qsort.hpp"
79+
/* _Float16 vector definition on SPR*/
80+
#if defined(__FLT16_MAX__) && defined(__AVX512BW__) && defined(__AVX512FP16__)
81+
#include "avx512fp16-16bit-qsort.hpp"
82+
#endif // __FLT16_MAX__
83+
#endif // __AVX512VBMI2__
84+
85+
XSS_METHODS(avx512)
86+
87+
// key-value currently only on avx512
88+
template <typename T1, typename T2>
89+
X86_SIMD_SORT_FINLINE void
90+
x86simdsortStatic::keyvalue_qsort(T1 *key, T2 *val, size_t size, bool hasnan)
91+
{
92+
avx512_qsort_kv(key, val, size, hasnan);
93+
}
94+
95+
#elif defined(__AVX2__) && !defined(__AVX512F__)
96+
/* 32-bit and 64-bit dtypes vector definitions on AVX2 */
97+
#include "avx2-32bit-half.hpp"
98+
#include "avx2-32bit-qsort.hpp"
99+
#include "avx2-64bit-qsort.hpp"
100+
XSS_METHODS(avx2)
101+
102+
#else
103+
#error "x86simdsortStatic methods needs to be compiled with avx512/avx2 specific flags"
104+
#endif // (__AVX512VL__ && __AVX512DQ__) || AVX2
105+
106+
#endif // X86_SIMD_SORT_STATIC_METHODS

0 commit comments

Comments
 (0)