-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwarp_bitonic_sort.cu
82 lines (76 loc) · 2.37 KB
/
warp_bitonic_sort.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/**
* Warp Bitoic Sort
*
* This uses warp shuffle to sort integers in a warp with bitonic sort
*
* Author: Andrew Boessen
*/
#include "bitonic_sort.cuh"
/**
* Swap
*
* This is used for swapping elements in bitonic sorting
*
* @param x caller line id's value
* @param mask source lane id = caller line id ^ mask
* @param dir direction to swap
*
* @return min or max of source and caller
*/
__device__ int swap(int x, int mask, int dir) {
// get correspondin element to x in butterfly diagram
int y = __shfl_xor_sync(0xffffffff, x, mask);
// return smaller or larger value based on direction of swap
return x < y == dir ? y : x;
}
/**
* Warp Bitonic Sort
*
* This function performs a bitonic sort on integers within a warp using warp
* shuffle operations. It sorts a portion of the input array corresponding to
* the calling thread's warp.
*
* The function uses the butterfly network pattern of bitonic sort, leveraging
* CUDA's warp-level primitives for efficient sorting within a warp (32
* threads).
*
* @param arr Pointer to the array of integers to be sorted
* @param size Total number of elements in the array
*
* @note This function assumes that the number of threads per block is at least
* equal to the warp size. Elements beyond the array size are padded with
* INT_MAX.
*
* @see swap() for the element comparison and swapping logic
*/
__global__ void warpBitonicSort(int *arr, int size) {
// threadIdx.x % 32
int lane_id = threadIdx.x & 0x1f;
int thread_id = threadIdx.x + blockIdx.x * blockDim.x;
// seed x with value from array
int x = thread_id < size ? arr[thread_id] : INT_MAX;
// make bitonic sequence and sort
for (int i = 0; (1 << i) <= warpSize; i++) {
for (int j = 0; j <= i; j++) {
// distance between caller and source lanes
int mask = 1 << (i - j);
// direction to swap caller and source lanes
int dir;
// only alternate direction when forming bitonic sequence
if (1 << i == warpSize) {
dir = (lane_id >> (i - j)) & 1;
} else {
dir = (lane_id >> (i + 1)) & 1 ^ (lane_id >> (i - j)) & 1;
}
x = swap(x, mask, dir);
}
}
// update value in array with sorted value
if (thread_id < size) {
arr[thread_id] = x;
}
}
void launchBitonicSort(int *arr, int size) {
const int BLOCK_SIZE = 512;
warpBitonicSort<<<size / BLOCK_SIZE, BLOCK_SIZE>>>(arr, size);
}