Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release v1.1.0 #56

Merged
merged 6 commits into from
Mar 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ jobs:
#########################
CPU:
# The type of runner that the job will run on
runs-on: ubuntu-18.04
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
@@ -33,7 +33,7 @@ jobs:

steps:
# Checks-out your repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
- uses: actions/checkout@v4


- name: Print build config
@@ -43,8 +43,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y libfftw3-dev make g++ clang wget git make
cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.11.4/cmake-3.11.4-Linux-x86_64.tar.gz && tar -xzvf cmake-3.11.4-Linux-x86_64.tar.gz
sudo apt-get install -y libfftw3-dev make g++ clang wget git make cmake
- name: Install MPI
if: ${{ matrix.use_mpi }}
@@ -57,7 +56,7 @@ jobs:
cd build
mkdir -p install_dir
export INSTALL_DIR=$(pwd)/install_dir
CXX=${COMPILER} ${HOME}/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=OFF -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}
CXX=${COMPILER} cmake .. -DSPFFT_BUILD_TESTS=OFF -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}
make -j2
make VERBOSE=1 install
test -f ${INSTALL_DIR}/lib/libspfft.so
@@ -70,7 +69,7 @@ jobs:
rm -rf build
mkdir -p build
cd build
CXX=${COMPILER} ${HOME}/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
CXX=${COMPILER} cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="-Wno-error=maybe-uninitialized"
make -j2
- name: Run tests
@@ -89,45 +88,51 @@ jobs:
# Build with CUDA
#################
CUDA:
runs-on: ubuntu-18.04
container: nvidia/cuda:9.2-devel-ubuntu18.04
runs-on: ubuntu-22.04
container: nvidia/cuda:11.0.3-devel-ubuntu20.04

steps:
# Checks-out your repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Install dependencies
run: |
apt-get update
apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make
cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.14.6/cmake-3.14.6-Linux-x86_64.tar.gz && tar -xzvf cmake-3.14.6-Linux-x86_64.tar.gz
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make
cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.18.0/cmake-3.18.0-Linux-x86_64.tar.gz && tar -xzvf cmake-3.18.0-Linux-x86_64.tar.gz
- name: Build
run: |
cd ${GITHUB_WORKSPACE}
mkdir -p build
cd build
${HOME}/cmake-3.14.6-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_OMP=OFF
${HOME}/cmake-3.18.0-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_OMP=OFF
make -j2
#################
# Build with ROCm
#################
ROCM:
runs-on: ubuntu-18.04
container: adhocman/master:ubuntu18.04_rocm
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:5.3-complete

steps:
# Checks-out your repository under $GITHUB_WORKSPACE
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Install dependencies
run: |
apt-get update
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make
cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.21.0/cmake-3.21.0-linux-x86_64.tar.gz && tar -xzvf cmake-3.21.0-linux-x86_64.tar.gz
- name: Build
run: |
cd ${GITHUB_WORKSPACE}
mkdir -p build
cd build
/root/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=ROCM -DCMAKE_PREFIX_PATH=/opt/rocm
${HOME}/cmake-3.21.0-linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=ROCM -DCMAKE_PREFIX_PATH=/opt/rocm
make -j2
18 changes: 18 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

version: 2

sphinx:
configuration: docs/source/conf.py

formats: []

build:
os: ubuntu-22.04
tools:
python: "3.11"

python:
install:
- requirements: docs/requirements.txt
42 changes: 19 additions & 23 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # 3.11 to avoid issues with OpenMP + CUDA
project(SpFFT LANGUAGES CXX VERSION 1.0.6)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) # 3.18 for C++17
project(SpFFT LANGUAGES CXX VERSION 1.1.0)
set(SPFFT_SO_VERSION 1)
set(SPFFT_VERSION ${PROJECT_VERSION})

@@ -22,26 +22,15 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
endif()

# set language and standard
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)

# set CUDA flags
if(NOT CMAKE_CUDA_FLAGS_RELEASE)
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "Flags used by CUDA compiler at given build type." FORCE)
endif()
if(NOT CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG" CACHE STRING "Flags used by CUDA compiler at given build type." FORCE)
endif()
if(NOT CMAKE_CUDA_FLAGS_MINSIZEREL)
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os -DNDEBUG" CACHE STRING "Flags used by CUDA compiler at given build type." FORCE)
endif()
if(NOT CMAKE_CUDA_FLAGS_DEBUG)
set(CMAKE_CUDA_FLAGS_DEBUG "-g" CACHE STRING "Flags used by CUDA compiler at given build type." FORCE)
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_HIP_STANDARD 17)

#add local module path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake/modules)

include(CMakeDependentOption)

# Options
option(SPFFT_STATIC "Compile as static library" OFF)
option(SPFFT_OMP "Compile with OpenMP support" ON)
@@ -51,6 +40,11 @@ option(SPFFT_BUILD_TESTS "Build tests" OFF)
option(SPFFT_SINGLE_PRECISION "Enable single precision support" OFF)
option(SPFFT_INSTALL "Enable CMake install commands" ON)
option(SPFFT_FORTRAN "Compile fortran module" OFF)
option(SPFFT_BUNDLED_LIBS "Use bundled libraries for building tests" ON)

cmake_dependent_option(SPFFT_BUNDLED_GOOGLETEST "Use bundled googletest lib" ON "SPFFT_BUNDLED_LIBS" OFF)
cmake_dependent_option(SPFFT_BUNDLED_JSON "Use bundled json lib" ON "SPFFT_BUNDLED_LIBS" OFF)
cmake_dependent_option(SPFFT_BUNDLED_CLI11 "Use bundled CLI11 lib" ON "SPFFT_BUNDLED_LIBS" OFF)

set(SPFFT_GPU_BACKEND "OFF" CACHE STRING "GPU backend")
set_property(CACHE SPFFT_GPU_BACKEND PROPERTY STRINGS
@@ -129,6 +123,10 @@ endif()

# ROCM
if(SPFFT_ROCM)
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) # hip support only added in 3.21

enable_language(HIP)

find_package(hip CONFIG REQUIRED)
find_package(rocfft CONFIG REQUIRED)
find_package(hipfft CONFIG) # hipfft within rocfft is deprecated. Use separate hipfft if available (not required).
@@ -144,11 +142,9 @@ if(SPFFT_ROCM)

list(APPEND SPFFT_EXTERNAL_LIBS hip::host roc::rocfft)

# FindHIP module provides compilation command for GPU code
find_package(HIP MODULE REQUIRED)
if(NOT HIP_HCC_FLAGS)
message(STATUS "Using default AMD gpu targets: gfx803, gfx900, gfx906. Set HIP_HCC_FLAGS to override.")
set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906)
# Previously used option for flags.
if(HIP_HCC_FLAGS)
message(WARNING "HIP_HCC_FLAGS has no effect. Use CMAKE_HIP_FLAGS for flags and CMAKE_HIP_ARCHITECTURES for arch instead.")
endif()
endif()

12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -38,16 +38,16 @@ A transform can be computed in-place and out-of-place. Addtionally, an internall
Documentation can be found [here](https://spfft.readthedocs.io/en/latest/).

## Requirements
- C++ Compiler with C++11 support. Supported compilers are:
- GCC 6 and later
- C++ Compiler with C++17 support. Supported compilers are:
- GCC 7 and later
- Clang 5 and later
- ICC 19.0 and later
- CMake 3.11 and later
- CMake 3.18 and later (3.21 for ROCm)
- Library providing a FFTW 3.x interface (FFTW3 or Intel MKL)
- For multi-threading: OpenMP support by the compiler
- For compilation with GPU support:
- CUDA 9.0 and later for Nvidia hardware
- ROCm 3.5 and later for AMD hardware
- CUDA 11.0 and later for Nvidia hardware
- ROCm 5.0 and later for AMD hardware

## Installation
The build system follows the standard CMake workflow. Example:
@@ -71,7 +71,9 @@ make -j8 install
| SPFFT_BUILD_TESTS | OFF | Build test executables for developement purposes |
| SPFFT_INSTALL | ON | Add library to install target |
| SPFFT_FORTRAN | OFF | Build Fortran interface module |
| SPFFT_BUNDLED_LIBS | ON | Download required libraries for building tests |

**_NOTE:_** When compiling with CUDA or ROCM (HIP), the standard `CMAKE_CUDA_ARCHITECTURES` and `CMAKE_HIP_ARCHITECTURES` options should be defined as well. `HIP_HCC_FLAGS` is no longer in use.

## Examples
Further exmples for C++, C and Fortran can be found in the "examples" folder.
712 changes: 0 additions & 712 deletions cmake/modules/FindHIP.cmake

This file was deleted.

201 changes: 0 additions & 201 deletions cmake/modules/FindHIP/run_hipcc.cmake

This file was deleted.

69 changes: 0 additions & 69 deletions cmake/modules/FindHIP/run_make2cmake.cmake

This file was deleted.

1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
breathe
sphinx-rtd-theme
2 changes: 1 addition & 1 deletion docs/source/details.rst
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ Transform Definition

Complex Number Format
---------------------
SpFFT always assumes an interleaved format in double or single precision. The alignment of memory provided for space domain data is guaranteed to fulfill to the requirements for std::complex (for C++11), C complex types and GPU complex types of CUDA or ROCm.
SpFFT always assumes an interleaved format in double or single precision. The alignment of memory provided for space domain data is guaranteed to fulfill to the requirements for std::complex (for C++17), C complex types and GPU complex types of CUDA or ROCm.

Indexing
--------
26 changes: 18 additions & 8 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
@@ -3,20 +3,26 @@ Installation

Requirements
------------
* C++ Compiler with C++11 support. Supported compilers are:

* GCC 6 and later
* C++ Compiler with C++17 support. Supported compilers are:

* GCC 7 and later

* Clang 5 and later

* ICC 19.0 and later

* CMake 3.18 and later (3.21 for ROCm)

* Library providing a FFTW 3.x interface (FFTW3 or Intel MKL)

* For multi-threading: OpenMP support by the compiler

- CMake 3.11 and later
- Library providing a FFTW 3.x interface (FFTW3 or Intel MKL)
- For multi-threading: OpenMP support by the compiler
- For compilation with GPU support:
* For compilation with GPU support:

* CUDA 9.0 and later for Nvidia hardware
* ROCm 3.5 and later for AMD hardware
* CUDA 11.0 and later for Nvidia hardware

* ROCm 5.0 and later for AMD hardware


Build
@@ -32,6 +38,10 @@ Example:
cmake .. -DSPFFT_OMP=ON -DSPFFT_MPI=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_SINGLE_PRECISION=OFF -DCMAKE_INSTALL_PREFIX=/usr/local
make -j8 install
NOTE: When compiling with CUDA or ROCM (HIP), the standard `CMAKE_CUDA_ARCHITECTURES` and `CMAKE_HIP_ARCHITECTURES` options should be defined as well. `HIP_HCC_FLAGS` is no longer in use.


CMake options
-------------
====================== ======= =============================================================
24 changes: 9 additions & 15 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -40,6 +40,11 @@ if(SPFFT_CUDA OR SPFFT_ROCM)
transpose/transpose_mpi_unbuffered_gpu.cpp
)
endif()

if(SPFFT_ROCM)
set_source_files_properties(${SPFFT_GPU_KERNELS} PROPERTIES LANGUAGE HIP)
endif()

list(APPEND SPFFT_SOURCE_FILES ${SPFFT_GPU_KERNELS})
endif()

@@ -51,24 +56,10 @@ if(SPFFT_MPI)
)
endif()

if(SPFFT_ROCM)
set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} -fno-gpu-rdc)
set(HIP_HCC_FLAGS_RELEASE ${HIP_HCC_FLAGS_RELEASE} -Wno-everything)
if(CMAKE_CXX_STANDARD)
set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} -std=gnu++${CMAKE_CXX_STANDARD})
endif()
endif()


# Creates library with given name. All common target modifications should be done here.
macro(spfft_create_library _TARGET_NAME)
# create target
if(SPFFT_ROCM)
# macro from FindHIP package, which compiles all .cu files with hipcc and cpp files with the set c++ compiler
HIP_ADD_LIBRARY(${_TARGET_NAME} ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES})
else()
add_library(${_TARGET_NAME} ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES})
endif()
add_library(${_TARGET_NAME} ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES})

# set version
set_property(TARGET ${_TARGET_NAME} PROPERTY VERSION ${SPFFT_VERSION})
@@ -79,6 +70,9 @@ macro(spfft_create_library _TARGET_NAME)
set_property(TARGET ${_TARGET_NAME} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
set_property(TARGET ${_TARGET_NAME} PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
endif()
if(SPFFT_ROCM)
target_compile_options(${_TARGET_NAME} PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-fno-gpu-rdc>)
endif()

# Don't export any symbols of external static libaries. Only works on linux.
if(UNIX AND NOT APPLE)
2 changes: 1 addition & 1 deletion src/execution/execution_gpu.cpp
Original file line number Diff line number Diff line change
@@ -405,7 +405,7 @@ auto ExecutionGPU<T>::synchronize(SpfftExecType mode) -> void {
endEvent_.record(stream_.get());
endEvent_.stream_wait(externalStream_);
} else {
gpu::stream_synchronize(stream_.get());
gpu::check_status(gpu::stream_synchronize(stream_.get()));
}
}

4 changes: 3 additions & 1 deletion src/gpu_util/gpu_device_guard.hpp
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@
#include "spfft/config.h"
#if defined(SPFFT_CUDA) || defined(SPFFT_ROCM)
#include <memory>
#include <tuple>
#include "gpu_util/gpu_runtime_api.hpp"
#include "spfft/exceptions.hpp"
namespace spfft {
@@ -51,7 +52,8 @@ class GPUDeviceGuard {

~GPUDeviceGuard() {
if (targetDeviceId_ != originalDeviceId_) {
gpu::set_device(originalDeviceId_); // no check to avoid throw exeception in destructor
std::ignore =
gpu::set_device(originalDeviceId_); // no check to avoid throw exeception in destructor
}
}

3 changes: 2 additions & 1 deletion src/gpu_util/gpu_event_handle.hpp
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@
#include "spfft/config.h"
#if defined(SPFFT_CUDA) || defined(SPFFT_ROCM)
#include <memory>
#include <tuple>
#include "gpu_util/gpu_runtime_api.hpp"
#include "spfft/exceptions.hpp"

@@ -45,7 +46,7 @@ class GPUEventHandle {
gpu::check_status(gpu::event_create_with_flags(&event, flag));

event_ = std::shared_ptr<gpu::EventType>(new gpu::EventType(event), [](gpu::EventType* ptr) {
gpu::event_destroy(*ptr);
std::ignore = gpu::event_destroy(*ptr);
delete ptr;
});
};
6 changes: 6 additions & 0 deletions src/gpu_util/gpu_fft_api.hpp
Original file line number Diff line number Diff line change
@@ -35,7 +35,13 @@
#define GPU_FFT_PREFIX(val) cufft##val

#elif defined(SPFFT_ROCM)

#if __has_include(<hipfft/hipfft.h>)
#include <hipfft/hipfft.h>
#else
#include <hipfft.h>
#endif

#define GPU_FFT_PREFIX(val) hipfft##val
#endif

17 changes: 3 additions & 14 deletions src/gpu_util/gpu_pointer_translation.hpp
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@

#include <gpu_util/gpu_runtime_api.hpp>
#include <utility>
#include <tuple>

#include "spfft/config.h"

@@ -45,7 +46,7 @@ auto translate_gpu_pointer(const T* inputPointer) -> std::pair<const T*, const T
auto status = gpu::pointer_get_attributes(&attr, static_cast<const void*>(inputPointer));

if (status != gpu::status::Success) {
gpu::get_last_error(); // clear error from cache
std::ignore = gpu::get_last_error(); // clear error from cache
// Invalid value is always indicated before CUDA 11 for valid host pointers, which have not been
// registered. -> Don't throw error in this case.
if (status != gpu::status::ErrorInvalidValue) gpu::check_status(status);
@@ -54,29 +55,17 @@ auto translate_gpu_pointer(const T* inputPointer) -> std::pair<const T*, const T
std::pair<const T*, const T*> ptrPair{nullptr, nullptr};

// get memory type - cuda 10 changed attribute name
#if defined(SPFFT_CUDA) && (CUDART_VERSION >= 10000)
#if (defined(SPFFT_CUDA) && (CUDART_VERSION >= 10000)) || (defined(SPFFT_ROCM) && (HIP_VERSION_MAJOR >= 6))
auto memoryType = attr.type;
#else
auto memoryType = attr.memoryType;
#endif

#if defined(SPFFT_ROCM) && (HIP_VERSION < 310)
// Workaround due to bug with HIP when parsing pointers with offset from allocated memory start.
// Fixed in ROCm 3.10.
if (memoryType != gpu::flag::MemoryTypeDevice) {
ptrPair.first = inputPointer;
} else {
ptrPair.second = inputPointer;
}

#else

if (memoryType != gpu::flag::MemoryTypeDevice) {
ptrPair.first = attr.hostPointer ? static_cast<const T*>(attr.hostPointer) : inputPointer;
} else {
ptrPair.second = static_cast<const T*>(attr.devicePointer);
}
#endif

return ptrPair;
}
3 changes: 2 additions & 1 deletion src/gpu_util/gpu_stream_handle.hpp
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@
#include "spfft/config.h"
#if defined(SPFFT_CUDA) || defined(SPFFT_ROCM)
#include <memory>
#include <tuple>
#include "gpu_util/gpu_runtime_api.hpp"
#include "spfft/exceptions.hpp"
namespace spfft {
@@ -50,7 +51,7 @@ class GPUStreamHandle {

stream_ =
std::shared_ptr<gpu::StreamType>(new gpu::StreamType(rawStream), [](gpu::StreamType* ptr) {
gpu::stream_destroy(*ptr);
std::ignore = gpu::stream_destroy(*ptr);
delete ptr;
});
};
5 changes: 3 additions & 2 deletions src/memory/gpu_array.hpp
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@
#define SPFFT_GPU_ARRAY_HPP

#include <cassert>
#include <tuple>
#include "gpu_util/gpu_runtime_api.hpp"
#include "spfft/config.h"
#include "util/common_types.hpp"
@@ -88,7 +89,7 @@ template <typename T>
GPUArray<T>::~GPUArray() {
if (data_) {
// don't check error to avoid throwing exception in destructor
gpu::free(data_);
std::ignore = gpu::free(data_);
data_ = nullptr;
size_ = 0;
}
@@ -104,7 +105,7 @@ GPUArray<T>::GPUArray(GPUArray&& array) noexcept
template <typename T>
auto GPUArray<T>::operator=(GPUArray&& array) noexcept -> GPUArray& {
if (data_) {
gpu::free(data_);
std::ignore = gpu::free(data_);
}
data_ = array.data_;
size_ = array.size_;
3 changes: 2 additions & 1 deletion src/memory/host_array.hpp
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
#include <memory>
#include <type_traits>
#include <vector>
#include <tuple>
#include "gpu_util/gpu_runtime_api.hpp"
#include "memory/aligned_allocation.hpp"
#include "spfft/config.h"
@@ -226,7 +227,7 @@ template <typename T>
auto HostArray<T>::unpin_memory() noexcept -> void {
#if defined(SPFFT_CUDA) || defined(SPFFT_ROCM)
if (pinned_) {
gpu::host_unregister((void*)data_);
std::ignore = gpu::host_unregister((void*)data_);
pinned_ = false;
}
#endif
93 changes: 41 additions & 52 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,72 +1,60 @@

if(SPFFT_BUILD_TESTS)
cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # git fetch module requires at least 3.11
cmake_minimum_required(VERSION 3.14 FATAL_ERROR) # FetchContent_MakeAvailable requires at least 3.14

# update time stamps when using FetchContent
if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()

set(BUILD_GMOCK OFF CACHE BOOL "")
set(INSTALL_GTEST OFF CACHE BOOL "")
mark_as_advanced(BUILD_GMOCK INSTALL_GTEST)
include(FetchContent)

# add googletest
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG release-1.8.1
)
FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
message(STATUS "Downloading Google Test repository...")
FetchContent_Populate(googletest)
endif()
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})

# add gtest_mpi
FetchContent_Declare(
gtest_mpi
GIT_REPOSITORY https://github.com/AdhocMan/gtest_mpi.git
GIT_TAG v1.0.0
)
FetchContent_GetProperties(gtest_mpi)
if(NOT gtest_mpi_POPULATED)
message(STATUS "Downloading Google Test MPI extension repository...")
FetchContent_Populate(gtest_mpi)
if(SPFFT_BUNDLED_GOOGLETEST)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz
URL_MD5 c8340a482851ef6a3fe618a082304cfc
)
FetchContent_MakeAvailable(googletest)
else()
find_package(googletest CONFIG REQUIRED)
endif()
add_subdirectory(${gtest_mpi_SOURCE_DIR} ${gtest_mpi_BINARY_DIR})
list(APPEND SPFFT_TEST_LIBRARIES gtest_main)

# add command line parser
FetchContent_Declare(
cli11
GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git
GIT_TAG v1.7.1
)
FetchContent_GetProperties(cli11)
if(NOT cli11_POPULATED)
message(STATUS "Downloading CLI11 command line parser repository...")
FetchContent_Populate(cli11)
if(SPFFT_BUNDLED_CLI11)
FetchContent_Declare(
cli11
URL https://github.com/CLIUtils/CLI11/archive/refs/tags/v2.3.2.tar.gz
URL_MD5 b80cb645dee25982110b068b426363ff
)
FetchContent_MakeAvailable(cli11)
else()
find_package(CLI11 CONFIG REQUIRED)
endif()
list(APPEND SPFFT_EXTERNAL_INCLUDE_DIRS ${cli11_SOURCE_DIR}/include)
list(APPEND SPFFT_TEST_LIBRARIES CLI11::CLI11)

# add json parser
set(JSON_Install OFF CACHE BOOL "")
FetchContent_Declare(
json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.6.1
)
FetchContent_GetProperties(json)
if(NOT json_POPULATED)
message(STATUS "Downloading json repository...")
FetchContent_Populate(json)
# add json parser
if(SPFFT_BUNDLED_JSON)
FetchContent_Declare(
json
URL https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.tar.gz
URL_MD5 e8d56bc54621037842ee9f0aeae27746
)
FetchContent_MakeAvailable(json)
else()
find_package(nlohmann_json CONFIG REQUIRED)
endif()
set(JSON_BuildTests OFF CACHE INTERNAL "")
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR})
list(APPEND SPFFT_EXTERNAL_LIBS nlohmann_json::nlohmann_json)


list(APPEND SPFFT_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/tests)

# benchmark executable
add_executable(benchmark programs/benchmark.cpp)
target_link_libraries(benchmark PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS})
target_link_libraries(benchmark PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS} CLI11::CLI11 nlohmann_json::nlohmann_json)
target_include_directories(benchmark PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS})

# test executables
@@ -77,19 +65,20 @@ if(SPFFT_BUILD_TESTS)
local_tests/test_fftw_prop_hash.cpp
local_tests/test_local_transform.cpp
)
target_link_libraries(run_local_tests PRIVATE gtest_main gtest_mpi)
target_link_libraries(run_local_tests PRIVATE gtest_main)
target_link_libraries(run_local_tests PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS})
target_include_directories(run_local_tests PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS})

if(SPFFT_MPI)
add_executable(run_mpi_tests
run_mpi_tests.cpp
gtest_mpi.cpp
mpi_tests/test_transform.cpp
mpi_tests/test_multi_transform.cpp
mpi_tests/test_transpose.cpp
mpi_tests/test_transpose_gpu.cpp
)
target_link_libraries(run_mpi_tests PRIVATE gtest_main gtest_mpi)
target_link_libraries(run_mpi_tests PRIVATE gtest_main)
target_link_libraries(run_mpi_tests PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS})
target_include_directories(run_mpi_tests PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS})
endif()
216 changes: 216 additions & 0 deletions tests/gtest_mpi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#include "gtest_mpi.hpp"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <mpi.h>
#include <stdexcept>
#include <vector>

namespace gtest_mpi {

namespace {

class MPIListener : public testing::EmptyTestEventListener {
public:
using UnitTest = testing::UnitTest;
using TestCase = testing::TestCase;
using TestInfo = testing::TestInfo;
using TestPartResult = testing::TestPartResult;
using TestSuite = testing::TestSuite;

MPIListener(testing::TestEventListener *listener)
: listener_(listener), comm_(MPI_COMM_WORLD), gather_called_(false) {
MPI_Comm_dup(MPI_COMM_WORLD, &comm_);
int rank;
MPI_Comm_rank(comm_, &rank);
if (rank != 0)
listener_.reset();
}

void OnTestProgramStart(const UnitTest &u) override {
if (listener_)
listener_->OnTestProgramStart(u);
}

void OnTestProgramEnd(const UnitTest &u) override {
if (listener_)
listener_->OnTestProgramEnd(u);
}

void OnTestStart(const TestInfo &test_info) override {
gather_called_ = false;
if (listener_)
listener_->OnTestStart(test_info);
}

void OnTestPartResult(const TestPartResult &test_part_result) override {
if (listener_) {
listener_->OnTestPartResult(test_part_result);
} else if (test_part_result.type() == TestPartResult::Type::kFatalFailure ||
test_part_result.type() ==
TestPartResult::Type::kNonFatalFailure) {
std::size_t fileIndex = strings_.size();
strings_ += test_part_result.file_name();
strings_ += '\0';

std::size_t messageIndex = strings_.size();
strings_ += test_part_result.message();
strings_ += '\0';

infos_.emplace_back(ResultInfo{test_part_result.type(), fileIndex,
test_part_result.line_number(),
messageIndex});
}
}

void OnTestEnd(const TestInfo &test_info) override {
if(!gather_called_){
std::cerr << "Missing GTEST_MPI_GUARD in test case!" << std::endl;
throw std::runtime_error("Missing GTEST_MPI_GUARD in test case!");
}

if (listener_)
listener_->OnTestEnd(test_info);
}

void OnTestIterationStart(const UnitTest &u, int it) override {
if (listener_)
listener_->OnTestIterationStart(u, it);
}

void OnEnvironmentsSetUpStart(const UnitTest &u) override {
if (listener_)
listener_->OnEnvironmentsSetUpStart(u);
}

void OnEnvironmentsSetUpEnd(const UnitTest &u) override {
if (listener_)
listener_->OnEnvironmentsSetUpEnd(u);
}

void OnTestSuiteStart(const TestSuite &t) override {
if (listener_)
listener_->OnTestSuiteStart(t);
}

void OnTestDisabled(const TestInfo &t) override {
if (listener_)
listener_->OnTestDisabled(t);
}
void OnTestSuiteEnd(const TestSuite &t) override {
if (listener_)
listener_->OnTestSuiteEnd(t);
}

void OnEnvironmentsTearDownStart(const UnitTest &u) override {
if (listener_)
listener_->OnEnvironmentsTearDownStart(u);
}

void OnEnvironmentsTearDownEnd(const UnitTest &u) override {
if (listener_)
listener_->OnEnvironmentsTearDownEnd(u);
}

void OnTestIterationEnd(const UnitTest &u, int it) override {
if (listener_)
listener_->OnTestIterationEnd(u, it);
}

void GatherPartResults() {
gather_called_ = true;
int rank, n_proc;
MPI_Comm_rank(comm_, &rank);
MPI_Comm_size(comm_, &n_proc);

if (rank == 0) {
decltype(infos_) remoteInfos;
decltype(strings_) remoteStrings;
for (int r = 1; r < n_proc; ++r) {
MPI_Status status;
int count;

// Result infos
MPI_Probe(r, 0, comm_, &status);
MPI_Get_count(&status, MPI_CHAR, &count);
auto numResults = static_cast<std::size_t>(count) /
sizeof(decltype(remoteInfos)::value_type);
remoteInfos.resize(numResults);
MPI_Recv(remoteInfos.data(), count, MPI_BYTE, r, 0, comm_,
MPI_STATUS_IGNORE);

// Only continue if any results
if (numResults) {
// Get strings
MPI_Probe(r, 0, comm_, &status);
MPI_Get_count(&status, MPI_CHAR, &count);
auto stringSize = static_cast<std::size_t>(count) /
sizeof(decltype(remoteStrings)::value_type);
remoteStrings.resize(stringSize);
MPI_Recv(&remoteStrings[0], count, MPI_BYTE, r, 0, comm_,
MPI_STATUS_IGNORE);

// Create error for every remote fail
for (const auto &info : remoteInfos) {
if (info.type == TestPartResult::Type::kFatalFailure ||
info.type == TestPartResult::Type::kNonFatalFailure) {
ADD_FAILURE_AT(&remoteStrings[info.fileIndex], info.lineNumber)
<< "Rank " << r << ": " << &remoteStrings[info.messageIndex];
}
}
}
}
} else {
MPI_Send(infos_.data(),
infos_.size() * sizeof(decltype(infos_)::value_type), MPI_BYTE,
0, 0, comm_);

// Only send string if results exist
if (infos_.size()) {
MPI_Send(strings_.data(),
strings_.size() * sizeof(decltype(strings_)::value_type),
MPI_BYTE, 0, 0, comm_);
}
}

infos_.clear();
strings_.clear();
}

private:
struct ResultInfo {
TestPartResult::Type type;
std::size_t fileIndex;
int lineNumber;
std::size_t messageIndex;
};

std::unique_ptr<testing::TestEventListener> listener_;
MPI_Comm comm_;
bool gather_called_;

std::vector<ResultInfo> infos_;
std::string strings_;
};

MPIListener *globalMPIListener = nullptr;

} // namespace

void InitGoogleTestMPI(int *argc, char **argv) {

::testing::InitGoogleTest(argc, argv);

auto &test_listeners = ::testing::UnitTest::GetInstance()->listeners();

globalMPIListener = new MPIListener(
test_listeners.Release(test_listeners.default_result_printer()));

test_listeners.Append(globalMPIListener);
}

TestGuard CreateTestGuard() {
return TestGuard{[]() { globalMPIListener->GatherPartResults(); }};
}

} // namespace gtest_mpi
28 changes: 28 additions & 0 deletions tests/gtest_mpi.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef GTEST_MPI_HPP
#define GTEST_MPI_HPP

#include <gtest/gtest.h>

namespace gtest_mpi {
// Internal helper struct
struct TestGuard {
void (*func)() = nullptr;

~TestGuard() {
if (func)
func();
}
};

// Initialize GoogleTest and MPI functionality. MPI_Init has to called before.
void InitGoogleTestMPI(int *argc, char **argv);

// Create a test guard, which has to be placed in all test cases.
TestGuard CreateTestGuard();

} // namespace gtest_mpi

// Helper macro for creating a test guard within test cases.
#define GTEST_MPI_GUARD auto gtest_mpi_guard__LINE__ = ::gtest_mpi::CreateTestGuard();

#endif
4 changes: 2 additions & 2 deletions tests/local_tests/test_local_transform.cpp
Original file line number Diff line number Diff line change
@@ -92,7 +92,7 @@ static auto param_type_names(
#define TEST_PROCESSING_UNITS SpfftProcessingUnitType::SPFFT_PU_HOST
#endif

INSTANTIATE_TEST_CASE_P(FullTest, TestLocalTransform,
INSTANTIATE_TEST_SUITE_P(FullTest, TestLocalTransform,
::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT),
::testing::Values(TEST_PROCESSING_UNITS),
::testing::Values(1, 2, 11, 12, 13, 100),
@@ -101,7 +101,7 @@ INSTANTIATE_TEST_CASE_P(FullTest, TestLocalTransform,
::testing::Values(false)),
param_type_names);

INSTANTIATE_TEST_CASE_P(CenteredIndicesTest, TestLocalTransform,
INSTANTIATE_TEST_SUITE_P(CenteredIndicesTest, TestLocalTransform,
::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT),
::testing::Values(TEST_PROCESSING_UNITS),
::testing::Values(1, 2, 11, 100),
4 changes: 4 additions & 0 deletions tests/mpi_tests/test_multi_transform.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include <fftw3.h>

#include <algorithm>
#include <memory>
#include <random>
#include <tuple>
#include <utility>
#include <vector>

#include "gtest/gtest.h"
#include "gtest_mpi.hpp"
#include "memory/array_view_utility.hpp"
#include "memory/host_array.hpp"
#include "memory/host_array_view.hpp"
@@ -17,6 +20,7 @@
#include "util/common_types.hpp"

TEST(MPIMultiTransformTest, BackwardsForwards) {
GTEST_MPI_GUARD
try {
MPICommunicatorHandle comm(MPI_COMM_WORLD);
const std::vector<double> zStickDistribution(comm.size(), 1.0);
15 changes: 13 additions & 2 deletions tests/mpi_tests/test_transform.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
#include "test_util/test_transform.hpp"

#include <fftw3.h>

#include <algorithm>
#include <memory>
#include <random>
#include <tuple>
#include <utility>
#include <vector>

#include "gtest/gtest.h"
#include "gtest_mpi.hpp"
#include "memory/array_view_utility.hpp"
#include "memory/host_array.hpp"
#include "memory/host_array_view.hpp"
@@ -36,6 +40,7 @@ class MPITransformTest : public TransformTest {
Grid grid_;
};
TEST_P(MPITransformTest, ForwardUniformDistribution) {
GTEST_MPI_GUARD
try {
std::vector<double> zStickDistribution(comm_size(), 1.0);
std::vector<double> xyPlaneDistribution(comm_size(), 1.0);
@@ -47,6 +52,7 @@ TEST_P(MPITransformTest, ForwardUniformDistribution) {
}

TEST_P(MPITransformTest, BackwardAllOneRank) {
GTEST_MPI_GUARD
try {
std::vector<double> zStickDistribution(comm_size(), 0.0);
zStickDistribution[0] = 1.0;
@@ -61,6 +67,7 @@ TEST_P(MPITransformTest, BackwardAllOneRank) {
}

TEST_P(MPITransformTest, ForwardAllOneRank) {
GTEST_MPI_GUARD
try {
std::vector<double> zStickDistribution(comm_size(), 0.0);
zStickDistribution[0] = 1.0;
@@ -75,6 +82,7 @@ TEST_P(MPITransformTest, ForwardAllOneRank) {
}

TEST_P(MPITransformTest, BackwardAllOneRankPerSide) {
GTEST_MPI_GUARD
try {
std::vector<double> zStickDistribution(comm_size(), 0.0);
zStickDistribution[0] = 1.0;
@@ -89,6 +97,7 @@ TEST_P(MPITransformTest, BackwardAllOneRankPerSide) {
}

TEST_P(MPITransformTest, ForwardAllOneRankPerSide) {
GTEST_MPI_GUARD
try {
std::vector<double> zStickDistribution(comm_size(), 0.0);
zStickDistribution[0] = 1.0;
@@ -103,6 +112,7 @@ TEST_P(MPITransformTest, ForwardAllOneRankPerSide) {
}

TEST_P(MPITransformTest, R2CUniformDistribution) {
GTEST_MPI_GUARD
try {
std::vector<double> xyPlaneDistribution(comm_size(), 1.0);
test_r2c(xyPlaneDistribution);
@@ -113,6 +123,7 @@ TEST_P(MPITransformTest, R2CUniformDistribution) {
}

TEST_P(MPITransformTest, R2COneRankAllPlanes) {
GTEST_MPI_GUARD
try {
std::vector<double> xyPlaneDistribution(comm_size(), 0.0);
xyPlaneDistribution[0] = 1.0;
@@ -170,7 +181,7 @@ static auto param_type_names(
#define TEST_PROCESSING_UNITS SpfftProcessingUnitType::SPFFT_PU_HOST
#endif

INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_SUITE_P(
FullTest, MPITransformTest,
::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_BUFFERED,
SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED,
@@ -182,7 +193,7 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(false)),
param_type_names);

INSTANTIATE_TEST_CASE_P(CenteredIndicesTest, MPITransformTest,
INSTANTIATE_TEST_SUITE_P(CenteredIndicesTest, MPITransformTest,
::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT),
::testing::Values(TEST_PROCESSING_UNITS),
::testing::Values(1, 2, 11, 100),
6 changes: 6 additions & 0 deletions tests/mpi_tests/test_transpose.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#include <fftw3.h>

#include <algorithm>
#include <memory>
#include <random>
#include <vector>

#include "gtest/gtest.h"
#include "gtest_mpi.hpp"
#include "memory/array_view_utility.hpp"
#include "memory/host_array.hpp"
#include "memory/host_array_view.hpp"
@@ -120,6 +123,7 @@ static void check_freq_domain(const HostArrayView2D<std::complex<double>>& freqV
}

TEST_F(TransposeTest, Unbuffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_x(), paramPtr_->dim_y());
auto freqView =
@@ -138,6 +142,7 @@ TEST_F(TransposeTest, Unbuffered) {
}

TEST_F(TransposeTest, CompactBuffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_x(), paramPtr_->dim_y());
auto freqView =
@@ -161,6 +166,7 @@ TEST_F(TransposeTest, CompactBuffered) {
}

TEST_F(TransposeTest, Buffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_x(), paramPtr_->dim_y());
auto freqView =
6 changes: 6 additions & 0 deletions tests/mpi_tests/test_transpose_gpu.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#include <fftw3.h>

#include <algorithm>
#include <memory>
#include <random>
#include <vector>

#include "gtest/gtest.h"
#include "gtest_mpi.hpp"
#include "memory/array_view_utility.hpp"
#include "memory/host_array.hpp"
#include "memory/host_array_view.hpp"
@@ -130,6 +133,7 @@ static void check_freq_domain(const HostArrayView2D<std::complex<double>>& freqV
}

TEST_F(TransposeGPUTest, Buffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_y(), paramPtr_->dim_x());
auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
@@ -170,6 +174,7 @@ TEST_F(TransposeGPUTest, Buffered) {
}

TEST_F(TransposeGPUTest, CompactBuffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_y(), paramPtr_->dim_x());
auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
@@ -212,6 +217,7 @@ TEST_F(TransposeGPUTest, CompactBuffered) {
}

TEST_F(TransposeGPUTest, Unbuffered) {
GTEST_MPI_GUARD
auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
paramPtr_->dim_y(), paramPtr_->dim_x());
auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()),
17 changes: 8 additions & 9 deletions tests/programs/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -141,17 +141,16 @@ int main(int argc, char** argv) {
app.add_option("-o", outputFileName, "Output file name")->required();
app.add_option("-m", numTransforms, "Multiple transform number")->default_val("1");
app.add_option("-s", sparsity, "Sparsity");
app.add_set("-t", transformTypeName,
std::set<std::string>{"c2c", "r2c"},
"Transform type")
app.add_option("-t", transformTypeName, "Transform type")
->check(CLI::IsMember({"c2c", "r2c"}))
->default_val("c2c");
app.add_set("-e", exchName,
std::set<std::string>{"all", "compact", "compactFloat", "buffered", "bufferedFloat",
"unbuffered"},
"Exchange type")
app.add_option("-e", exchName, "Exchange type")
->check(CLI::IsMember(
{"all", "compact", "compactFloat", "buffered", "bufferedFloat", "unbuffered"}))
->required();
app.add_set("-p", procName, std::set<std::string>{"cpu", "gpu", "gpu-gpu"},
"Processing unit. With gpu-gpu, device memory is used as input and output.")
app.add_option("-p", procName,
"Processing unit. With gpu-gpu, device memory is used as input and output.")
->check(CLI::IsMember({"cpu", "gpu", "gpu-gpu"}))
->required();
CLI11_PARSE(app, argc, argv);

25 changes: 7 additions & 18 deletions tests/run_mpi_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,18 @@
#include <mpi.h>

#include "gtest/gtest.h"
#include "gtest_mpi/gtest_mpi.hpp"
#include "gtest_mpi.hpp"

int main(int argc, char* argv[]) {
// Initialize MPI before any call to gtest_mpi
MPI_Init(&argc, &argv);

// Intialize google test
::testing::InitGoogleTest(&argc, argv);

// Add a test envirnment, which will initialize a test communicator
// (a duplicate of MPI_COMM_WORLD)
::testing::AddGlobalTestEnvironment(new gtest_mpi::MPITestEnvironment());

auto& test_listeners = ::testing::UnitTest::GetInstance()->listeners();
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);

// Remove default listener and replace with the custom MPI listener
delete test_listeners.Release(test_listeners.default_result_printer());
test_listeners.Append(new gtest_mpi::PrettyMPIUnitTestResultPrinter());
gtest_mpi::InitGoogleTestMPI(&argc, argv);

// run tests
auto exit_code = RUN_ALL_TESTS();
auto status = RUN_ALL_TESTS();

// Finalize MPI before exiting
MPI_Finalize();

return exit_code;
return status;
}