Skip to content

Commit 71a9cfa

Browse files
committed
Adds OpenMP supports for kv-sort
1 parent 2315766 commit 71a9cfa

File tree

6 files changed

+67
-10
lines changed

6 files changed

+67
-10
lines changed

Makefile

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
test:
2-
meson setup -Dbuild_tests=true --warnlevel 2 --werror --buildtype release builddir
2+
meson setup -Dbuild_tests=true -Duse_openmp=false --warnlevel 2 --werror --buildtype release builddir
3+
cd builddir && ninja
4+
5+
test_openmp:
6+
meson setup -Dbuild_tests=true -Duse_openmp=true --warnlevel 2 --werror --buildtype release builddir
37
cd builddir && ninja
48

59
bench:

lib/meson.build

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
libtargets = []
22

3+
# Add compile flags for OpenMP if enabled
4+
openmpflags = []
5+
if get_option('use_openmp')
6+
openmpflags = ['-DXSS_USE_OPENMP=true', '-fopenmp']
7+
endif
8+
39
if cpp.has_argument('-march=haswell')
410
libtargets += static_library('libavx',
511
files(
612
'x86simdsort-avx2.cpp',
713
),
814
include_directories : [src],
9-
cpp_args : ['-march=haswell'],
15+
cpp_args : ['-march=haswell', openmpflags],
1016
gnu_symbol_visibility : 'inlineshidden',
1117
)
1218
endif
@@ -17,7 +23,7 @@ if cpp.has_argument('-march=skylake-avx512')
1723
'x86simdsort-skx.cpp',
1824
),
1925
include_directories : [src],
20-
cpp_args : ['-march=skylake-avx512'],
26+
cpp_args : ['-march=skylake-avx512', openmpflags],
2127
gnu_symbol_visibility : 'inlineshidden',
2228
)
2329
endif

meson.build

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ subdir('lib')
3737
libsimdsort = shared_library('x86simdsortcpp',
3838
'lib/x86simdsort.cpp',
3939
include_directories : [src, utils, lib],
40+
link_args : [openmpflags],
4041
link_with : [libtargets],
4142
gnu_symbol_visibility : 'inlineshidden',
4243
install : true,

meson_options.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,7 @@ option('build_benchmarks', type : 'boolean', value : false,
44
description : 'Build benchmarking suite (default: "false").')
55
option('build_ippbench', type : 'boolean', value : false,
66
description : 'Add IPP sort to benchmarks (default: "false").')
7-
option('build_vqsortbench', type : 'boolean', value : false,
8-
description : 'Add google vqsort to benchmarks (default: "false").')
7+
option('build_vqsortbench', type : 'boolean', value : true,
8+
description : 'Add google vqsort to benchmarks (default: "true").')
9+
option('use_openmp', type : 'boolean', value : false,
10+
description : 'Use OpenMP to accelerate key-value sort (default: "false").')

scripts/branch-compare.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ build_branch() {
2727
fi
2828
fi
2929
cd $dir_name
30-
meson setup -Dbuild_benchmarks=true --warnlevel 0 --buildtype release builddir
30+
meson setup -Dbuild_benchmarks=true -Duse_openmp=true --warnlevel 0 --buildtype release builddir
3131
cd builddir
3232
ninja
3333
cd ../../

src/xss-common-keyvaluesort.hpp

+48-4
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys,
366366
type2_t *indexes,
367367
arrsize_t left,
368368
arrsize_t right,
369-
int max_iters)
369+
int max_iters,
370+
arrsize_t task_threshold)
370371
{
371372
/*
372373
* Resort to std::sort if quicksort isnt making any progress
@@ -391,14 +392,44 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys,
391392
type1_t biggest = vtype1::type_min();
392393
arrsize_t pivot_index = kvpartition_unrolled<vtype1, vtype2, 4>(
393394
keys, indexes, left, right + 1, pivot, &smallest, &biggest);
395+
396+
397+
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
398+
if (pivot != smallest) {
399+
bool parallelLeft = (pivot_index - left) > task_threshold;
400+
if (parallelLeft){
401+
#pragma omp task if(parallelLeft)
402+
kvsort_<vtype1, vtype2>(
403+
keys, indexes, left, pivot_index - 1, max_iters - 1, task_threshold);
404+
}else{
405+
kvsort_<vtype1, vtype2>(
406+
keys, indexes, left, pivot_index - 1, max_iters - 1, task_threshold);
407+
}
408+
}
409+
if (pivot != biggest) {
410+
bool parallelRight = (right - pivot_index) > task_threshold;
411+
412+
if (parallelRight){
413+
#pragma omp task if(parallelRight)
414+
kvsort_<vtype1, vtype2>(
415+
keys, indexes, pivot_index, right, max_iters - 1, task_threshold);
416+
}else{
417+
kvsort_<vtype1, vtype2>(
418+
keys, indexes, pivot_index, right, max_iters - 1, task_threshold);
419+
}
420+
}
421+
#else
422+
UNUSED(task_threshold);
423+
394424
if (pivot != smallest) {
395425
kvsort_<vtype1, vtype2>(
396-
keys, indexes, left, pivot_index - 1, max_iters - 1);
426+
keys, indexes, left, pivot_index - 1, max_iters - 1, 0);
397427
}
398428
if (pivot != biggest) {
399429
kvsort_<vtype1, vtype2>(
400-
keys, indexes, pivot_index, right, max_iters - 1);
430+
keys, indexes, pivot_index, right, max_iters - 1, 0);
401431
}
432+
#endif
402433
}
403434

404435
template <typename vtype1,
@@ -486,7 +517,20 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
486517
UNUSED(hasnan);
487518
}
488519

489-
kvsort_<keytype, valtype>(keys, indexes, 0, arrsize - 1, maxiters);
520+
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
521+
bool useParallel = arrsize > 10000;
522+
arrsize_t taskThreshold = std::max((arrsize_t) 10000, arrsize / 100);
523+
if (useParallel){
524+
#pragma omp parallel
525+
#pragma omp single
526+
kvsort_<keytype, valtype>(keys, indexes, 0, arrsize - 1, maxiters, taskThreshold);
527+
}else{
528+
kvsort_<keytype, valtype>(keys, indexes, 0, arrsize - 1, maxiters, taskThreshold);
529+
}
530+
#else
531+
kvsort_<keytype, valtype>(keys, indexes, 0, arrsize - 1, maxiters, 0);
532+
#endif
533+
490534
replace_inf_with_nan(keys, arrsize, nan_count);
491535

492536
if (descending) {

0 commit comments

Comments
 (0)