Skip to content

Sort #87

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open

Sort #87

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "cmake-common"]
path = cmake-common
url = https://github.com/SacBase/cmake-common.git
[submodule "src/algorithms/cccl"]
path = src/algorithms/cccl
url = https://github.com/NVIDIA/cccl/
1 change: 1 addition & 0 deletions cmake/sac-core-ext.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,4 @@ auxiliary/Interval.sac Ext
auxiliary/Hiding.sac Ext
auxiliary/C99Benchmarking.sac Ext
auxiliary/Benchmarking.sac Ext
algorithms/Sort.sac Core
94 changes: 92 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,16 @@ SET (C_DEPS_SRC
auxiliary/src/C99Benchmarking/bench.c
)

# C++ files relatively to thes CMakeLists.txt.
SET (CXX_DEPS_SRC
algorithms/src/sort_cpu.cpp
)

# Cuda files relatively to thes CMakeLists.txt.
SET (CUDA_DEPS_SRC
algorithms/src/sort_gpu.cu
)

# Read the list of sac sources from sac-core-ext.txt
PARSE_CORE_EXT_CONFIG (
"${CMAKE_SOURCE_DIR}/cmake/sac-core-ext.txt"
Expand Down Expand Up @@ -375,6 +385,77 @@ FOREACH (name ${C_DEPS_SRC})
)
ENDFOREACH (name)

# For every C++ source, compile an object file maintaining the right location
# in the binary dir so that sac files can pick it up.
FOREACH (name ${CXX_DEPS_SRC})
SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}")

GET_FILENAME_COMPONENT (dir ${name} DIRECTORY)

GET_FILENAME_COMPONENT (dst ${name} NAME_WE)
SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}")

# Make sure that we put the object file in the same location where
# the source file was.
FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}")

MESSAGE(STATUS "Compiling ${dst}")
ADD_CUSTOM_COMMAND (
OUTPUT "${dst}"
MAIN_DEPENDENCY "${src}"
IMPLICIT_DEPENDS C "${src}"
COMMAND
${CMAKE_CXX_COMPILER} -I${CMAKE_CURRENT_SOURCE_DIR}/${dir}
-I${CMAKE_CURRENT_BINARY_DIR}/${dir}
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub
-O3
-march=native
-mtune=native
-fPIC
-c "${src}"
-o "${dst}"
WORKING_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/${dir}"
COMMENT "Generating ${dst} for target `${TARGET}'"
)
ENDFOREACH (name)

IF ("${TARGET}" MATCHES "^cuda.*")
FOREACH (name ${CUDA_DEPS_SRC})
SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}")

GET_FILENAME_COMPONENT (dir ${name} DIRECTORY)

GET_FILENAME_COMPONENT (dst ${name} NAME_WE)
SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}")

MESSAGE(STATUS "Compiling ${dst}")
# Make sure that we put the object file in the same location where
# the source file was.
FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}")

ADD_CUSTOM_COMMAND (
OUTPUT "${dst}"
MAIN_DEPENDENCY "${src}"
IMPLICIT_DEPENDS C "${src}"
COMMAND
nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir}
-I${CMAKE_CURRENT_BINARY_DIR}/${dir}
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include
-I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub
-O3
--compiler-options -fPIC
-c "${src}"
-o "${dst}"
WORKING_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/${dir}"
COMMENT "Generating ${dst} for target `${TARGET}'"
)
ENDFOREACH (name)
ENDIF ()

# Make a directory for sac2c output
FILE (MAKE_DIRECTORY "${DLL_BUILD_DIR}/${TARGET_ENV}/${SBI}")
Expand Down Expand Up @@ -438,7 +519,12 @@ FOREACH (name ${SAC_SRC})
ADD_CUSTOM_COMMAND (
OUTPUT "${mod}" "${tree}"
COMMAND
${SAC2C} -v0 -linksetsize ${LINKSETSIZE} ${NOTREE_FLAG} -o ${DLL_BUILD_DIR} "${src}"
${SAC2C} -v0
-linksetsize ${LINKSETSIZE}
-Xl -lstdc++
${NOTREE_FLAG}
-o ${DLL_BUILD_DIR}
"${src}"
WORKING_DIRECTORY
"${dir}"
MAIN_DEPENDENCY "${src}"
Expand Down Expand Up @@ -515,7 +601,11 @@ FOREACH (name ${XSAC_SRC})
ADD_CUSTOM_COMMAND (
OUTPUT "${mod}" "${tree}"
COMMAND
${SAC2C} -v0 -linksetsize ${LINKSETSIZE} -o ${DLL_BUILD_DIR} "${dir}/${dst}.sac"
${SAC2C} -v0
-linksetsize ${LINKSETSIZE}
-Xl -lstdc++
-o ${DLL_BUILD_DIR}
"${dir}/${dst}.sac"
WORKING_DIRECTORY
"${dir}"
MAIN_DEPENDENCY "${dir}/${dst}.sac"
Expand Down
7 changes: 7 additions & 0 deletions src/algorithms/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
This directory is for functions that are not practical to implement in SaC.
This can happen for two reasons:

1. The optimal algorithm for computing the function depends on the backend.
2. Too difficult.

Currently only a sorting function is included, for reason 1.
30 changes: 30 additions & 0 deletions src/algorithms/Sort.sac
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module Sort;
export {Sort};

int[n] Sort(double[n] keys)
{
/* TODO inefficient for the GPU backend as iota(n) will be created
on the device, send back to the host, and then back to the device
again. Still 10x faster on a RTX 1650 than on a Ryzen 4600H. */
iota = with {
([0] <= [i] < [n]): i;
}: genarray([n], 0);
return TrueSort(keys, iota, n);
}

/* indices must be initialised to iota(n) */
external int[n] TrueSort(double[n] keys, int[n] indices, int n);
#pragma linkname "MySortDouble"
#pragma linksign [2, 1, 2, 3]
#if defined(SAC_TARGET_cuda)
#pragma linkobj "src/sort_gpu.o"
#pragma gpumem [0, 1, 2]
#elif defined(SAC_TARGET_default_sbi)
#pragma linkobj "src/sort_cpu.o"
#elif defined(SAC_TARGET_mt_pth)
/* TODO: make a multithreaded version work here. Thrust clashes
with our private heap manager. */
#pragma linkobj "src/sort_cpu.o"
#else
#pragma linkobj "src/sort_cpu.o"
#endif
1 change: 1 addition & 0 deletions src/algorithms/cccl
Submodule cccl added at 216584
10 changes: 10 additions & 0 deletions src/algorithms/src/sort_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CPP
#include <thrust/execution_policy.h>
#include <thrust/sort.h>

extern "C" {
void MySortDouble(double *keys, int *indices, int n)
{
thrust::sort_by_key(thrust::host, keys, keys + n, indices);
}
}
9 changes: 9 additions & 0 deletions src/algorithms/src/sort_gpu.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include <thrust/execution_policy.h>
#include <thrust/sort.h>

extern "C" {
void MySortDouble(double *keys, int *indices, int n)
{
thrust::sort_by_key(thrust::device, keys, keys + n, indices);
}
}