Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Global Memory Kernel #4

Merged
merged 3 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- name: Build with Makefile
run: |
# Make sure the Makefile exists and then build the project
if [ -f makefile ]; then
if [ -f Makefile ]; then
make
else
echo "Makefile not found!"
Expand Down
10 changes: 8 additions & 2 deletions makefile → Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@ CUDA_PATH = /opt/cuda
INCLUDES = -I$(CUDA_PATH)/include
LDFLAGS = -L$(CUDA_PATH)/lib64 -lcudart

all: cpu_bitonic_sort warp_bitonic_sort smem_bitonic_sort
all: cpu_bitonic_sort warp_bitonic_sort smem_bitonic_sort global_bitonic_sort

warp_bitonic_sort: main.o warp_bitonic_sort.o
$(CXX) $^ -o $@ $(LDFLAGS)

smem_bitonic_sort: main.o smem_bitonic_sort.o
$(CXX) $^ -o $@ $(LDFLAGS)

global_bitonic_sort: main.o global_bitonic_sort.o
$(CXX) $^ -o $@ $(LDFLAGS)

cpu_bitonic_sort: cpu_bitonic_sort.cpp
$(CXX) $^ -o $@

Expand All @@ -26,5 +29,8 @@ warp_bitonic_sort.o: warp_bitonic_sort.cu bitonic_sort.cuh
smem_bitonic_sort.o: smem_bitonic_sort.cu bitonic_sort.cuh
$(NVCC) $(NVCCFLAGS) -c $< -o $@

global_bitonic_sort.o: global_bitonic_sort.cu bitonic_sort.cuh
$(NVCC) $(NVCCFLAGS) -c $< -o $@

clean:
rm -f *.o warp_bitonic_sort smem_bitonic_sort cpu_bitonic_sort
rm -f *.o warp_bitonic_sort smem_bitonic_sort cpu_bitonic_sort global_bitonic_sort
80 changes: 80 additions & 0 deletions global_bitonic_sort.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Global Memory Bitoic Sort
*
* This uses gpu global memory to sort arrays to sort long arrays of ints
*
* Author: Andrew Boessen
*/

#include "bitonic_sort.cuh"

/**
* Global Memory Bitonic Sort Swap
*
* This is used for swapping elements in bitonic sorting
*
* @param x caller line id's value
* @param i current large step in bitonic sort sequence
* @param j current small step in sequence
* @param arr global memory array
*
*/
__global__ void globalSwap(int i, int j, int *arr) {
// thread id within grid
int x = threadIdx.x + blockIdx.x * blockDim.x;

// distance between caller and source lanes
int mask = 1 << (i - j);

// perform compare and swap
int dir = x & (1 << i);

// get correspondin element to x in butterfly diagram
int y = x ^ mask;
// lower ids thread perform swap
if (y > x) {
if (dir) {
// sort ascending
if (arr[x] < arr[y]) {
int temp = arr[x];
arr[x] = arr[y];
arr[y] = temp;
}
} else {
// sort descending
if (arr[x] > arr[y]) {
int temp = arr[x];
arr[x] = arr[y];
arr[y] = temp;
}
}
}
}

/**
* Global Memory Bitonic Sort
*
* @param arr Pointer to the array of integers to be sorted
* @param size Total number of elements in the array
* @param block_size Number of threads in one block
* @param num_blocks Number of total block in grid
*
* @note This function assumes that the number elements in the arrays is a power
* of two
*
* @see globalSwap() for the element comparison and swapping logic kernel
*/
void globalBitonicSort(int *arr, int size, int block_size,
int num_blocks) { // make bitonic sequence and sort
for (int i = 0; (1 << i) <= size; i++) {
for (int j = 1; j <= i; j++) {
globalSwap<<<num_blocks, block_size>>>(i, j, arr);
}
}
}
void launchBitonicSort(int *arr, int size) {
const int BLOCK_SIZE = 512;
const int NUM_BLOCKS = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
// call sort function
globalBitonicSort(arr, size, BLOCK_SIZE, NUM_BLOCKS);
}