From 56d563c50408ec1ece4127a467927c0876c07ce6 Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Wed, 5 Mar 2025 21:47:52 -0800 Subject: [PATCH 1/7] fft functional --- .gitlab-ci-correaa.yml | 37 +- include/boost/multi/adaptors/cufft.hpp | 29 +- .../boost/multi/adaptors/cufft/test/cufft.cpp | 1037 +++++++++-------- include/boost/multi/adaptors/fft.hpp | 182 ++- include/boost/multi/adaptors/fftw.hpp | 33 +- .../boost/multi/adaptors/fftw/test/fft.cpp | 170 +++ 6 files changed, 907 insertions(+), 581 deletions(-) create mode 100644 include/boost/multi/adaptors/fftw/test/fft.cpp diff --git a/.gitlab-ci-correaa.yml b/.gitlab-ci-correaa.yml index 9c1eef780..1a1479fcd 100644 --- a/.gitlab-ci-correaa.yml +++ b/.gitlab-ci-correaa.yml @@ -574,40 +574,6 @@ nvhpc-22.7: - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure needs: ["nvhpc"] -nvhpc-24.11 c++20 par: - stage: build - image: nvcr.io/nvidia/nvhpc:24.11-devel-cuda12.6-ubuntu24.04 # nvcr.io/nvidia/nvhpc:24.7-devel-cuda12.5-ubuntu24.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags - tags: - - non-shared - - large-disk-space - - x86_64 - interruptible: true - script: - - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-timer-dev libboost-serialization-dev libfftw3-dev pkg-config - - /opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ --version - - mkdir build && cd build - - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=20 -DCMAKE_CXX_FLAGS="-stdpar=multicore" - - cmake --build . --parallel 2 || cmake --build . --verbose - - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure - needs: ["nvhpc"] - -nvhpc-24.11: - stage: build - image: nvcr.io/nvidia/nvhpc:24.11-devel-cuda12.6-ubuntu24.04 # nvcr.io/nvidia/nvhpc:24.7-devel-cuda12.5-ubuntu24.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags - tags: - - non-shared - - large-disk-space - - x86_64 - interruptible: true - script: - - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-timer-dev libboost-serialization-dev libfftw3-dev pkg-config - - /opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ --version - - mkdir build && cd build - - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Release - - cmake --build . --parallel 2 || cmake --build . --verbose - - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure - needs: ["nvhpc"] - nvhpc-25.1: stage: build image: nvcr.io/nvidia/nvhpc:25.1-devel-cuda12.6-ubuntu24.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags @@ -620,12 +586,11 @@ nvhpc-25.1: - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-timer-dev libboost-serialization-dev libfftw3-dev pkg-config - /opt/nvidia/hpc_sdk/Linux_x86_64/2025/compilers/bin/nvc++ --version - mkdir build && cd build - - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2025/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Release + - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2025/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Release # -DCMAKE_CXX_STANDARD=20 -DCMAKE_CXX_FLAGS="-stdpar=multicore" - cmake --build . --parallel 2 || cmake --build . --verbose - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure needs: ["nvhpc"] - cuda: stage: build allow_failure: false diff --git a/include/boost/multi/adaptors/cufft.hpp b/include/boost/multi/adaptors/cufft.hpp index e83aaf62f..fd3628177 100644 --- a/include/boost/multi/adaptors/cufft.hpp +++ b/include/boost/multi/adaptors/cufft.hpp @@ -11,12 +11,13 @@ #include #include +#include #include #include #include // for raw_pointer_cast -#if not defined(__HIP_ROCclr__) +#if !defined(__HIP_ROCclr__) #include #include #endif @@ -88,22 +89,22 @@ class plan { Alloc alloc_; ::size_t workSize_ = 0; void* workArea_{}; - - using complex_type = cufftDoubleComplex; cufftHandle h_{}; // TODO(correaa) put this in a unique_ptr std::array, DD + 1> which_iodims_{}; int first_howmany_{}; + + using complex_type = cufftDoubleComplex; -public: + public: using allocator_type = Alloc; plan(plan&& other) noexcept : - h_{std::exchange(other.h_, {})}, - which_iodims_{std::exchange(other.which_iodims_, {})}, - first_howmany_{std::exchange(other.first_howmany_, {})}, + alloc_{std::move(other.alloc_)}, workSize_{std::exchange(other.workSize_, {})}, workArea_{std::exchange(other.workArea_, {})}, - alloc_{std::move(other.alloc_)} + h_{std::exchange(other.h_, {})}, + which_iodims_{std::exchange(other.which_iodims_, {})}, + first_howmany_{std::exchange(other.first_howmany_, {})} {} template< @@ -136,8 +137,8 @@ class plan { std::array dims{}; auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) {return elem.second;}); - std::array howmany_dims{}; - auto const howmany_dims_end = std::transform(part, which_iodims.end() -1, howmany_dims.begin(), [](auto elem) {return elem.second;}); + // std::array howmany_dims{}; + // auto const howmany_dims_end = std::transform(part, which_iodims.end() -1, howmany_dims.begin(), [](auto elem) {return elem.second;}); which_iodims_ = which_iodims; first_howmany_ = part - which_iodims.begin(); @@ -157,7 +158,7 @@ class plan { int ostride = *(ostrides_end -1); auto onembed = ostrides; onembed.fill(0); - for(std::size_t idx = 1; idx != ion_end - ion.begin(); ++idx) { // NOLINT(altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) replace with algorithm + for(std::ptrdiff_t idx = 1; idx != ion_end - ion.begin(); ++idx) { // NOLINT(altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) replace with algorithm assert(ostrides[idx - 1] >= ostrides[idx]); assert(ostrides[idx - 1] % ostrides[idx] == 0); onembed[idx] = ostrides[idx - 1] / ostrides[idx]; @@ -286,7 +287,7 @@ class plan { private: void ExecZ2Z_(complex_type const* idata, complex_type* odata, int direction) const{ - cufftSafeCall(::cufftExecZ2Z(h_, const_cast(idata), odata, direction)); // NOLINT(cppcoreguidelines-pro-type-const-cast) wrap legacy interface + cufftSafeCall(cufftExecZ2Z(h_, const_cast(idata), odata, direction)); // NOLINT(cppcoreguidelines-pro-type-const-cast) wrap legacy interface // cudaDeviceSynchronize(); } @@ -300,7 +301,7 @@ class plan { if(first_howmany_ == DD - 1) { if( which_iodims_[first_howmany_].first) {throw std::runtime_error{"logic error"};} for(int idx = 0; idx != which_iodims_[first_howmany_].second.n; ++idx) { // NOLINT(altera-unroll-loops,altera-id-dependent-backward-branch) - ::cufftExecZ2Z( + cufftExecZ2Z( h_, const_cast(reinterpret_cast(::thrust::raw_pointer_cast(idata + idx*which_iodims_[first_howmany_].second.is))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) legacy interface reinterpret_cast(::thrust::raw_pointer_cast(odata + idx*which_iodims_[first_howmany_].second.os)) , // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) legacy interface @@ -315,7 +316,7 @@ class plan { if(idata == odata) {throw std::runtime_error{"complicated inplace 2"};} for(int idx = 0; idx != which_iodims_[first_howmany_].second.n; ++idx) { // NOLINT(altera-unroll-loops,altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) use an algorithm for(int jdx = 0; jdx != which_iodims_[first_howmany_ + 1].second.n; ++jdx) { // NOLINT(altera-unroll-loops,altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) use an algorithm - ::cufftExecZ2Z( + cufftExecZ2Z( h_, const_cast(reinterpret_cast(::thrust::raw_pointer_cast(idata + idx*which_iodims_[first_howmany_].second.is + jdx*which_iodims_[first_howmany_ + 1].second.is))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) legacy interface reinterpret_cast(::thrust::raw_pointer_cast(odata + idx*which_iodims_[first_howmany_].second.os + jdx*which_iodims_[first_howmany_ + 1].second.os)) , // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) legacy interface diff --git a/include/boost/multi/adaptors/cufft/test/cufft.cpp b/include/boost/multi/adaptors/cufft/test/cufft.cpp index 2c25e4e82..9ce3de004 100644 --- a/include/boost/multi/adaptors/cufft/test/cufft.cpp +++ b/include/boost/multi/adaptors/cufft/test/cufft.cpp @@ -21,6 +21,8 @@ #include #endif +#include + #include #include @@ -104,6 +106,15 @@ auto main() -> int { BOOST_TEST( std::abs((complex(fw_gpu[3][2]) - fw_cpu[3][2]).real()) < 1.0e-8 ); BOOST_TEST( std::abs((complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag()) < 1.0e-8 ); + + // TODO(correaa) test funcional interface for GPU + // auto const& dft = multi::fft::DFT({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( dft.extensions() == in_cpu.extensions() ); + // BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); + // BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); + + // multi::array const fw_cpu_out = multi::fft::DFT({true, true}, in_cpu, multi::fft::forward); } } @@ -111,458 +122,458 @@ auto main() -> int { } // #if 0 - // { - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - - // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); - - // multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu.layout()) - // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); - - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); - // } - // { - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - - // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); - - // for(int i = 0; i != in_gpu.size(); ++i) { - // multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) - // .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); - // } - - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); - // } - // { - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - // auto fw_gpu2 = multi::thrust::cuda::array(extensions(in_gpu)); - // auto fw_gpu3 = multi::thrust::cuda::array(extensions(in_gpu)); - - // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); - - // for(int i = 0; i != in_gpu.size(); ++i) { - // multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) - // .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); - // } - - // multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu2.layout()) - // .execute(in_gpu.base(), fw_gpu2.base(), multi::cufft::forward); - - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).imag() == 0.0 ); - // } - // { - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto const fw_gpu = multi::cufft::dft({false, true}, in_gpu, multi::cufft::forward); - - // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); - // } - // { - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft({true, false}, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto const fw_gpu = multi::cufft::dft({true, false}, in_gpu, multi::cufft::forward); - - // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); - // } + // { + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu.layout()) + // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + // } + // { + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // for(int i = 0; i != in_gpu.size(); ++i) { + // multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) + // .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); + // } + + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + // } + // { + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + // auto fw_gpu2 = multi::thrust::cuda::array(extensions(in_gpu)); + // auto fw_gpu3 = multi::thrust::cuda::array(extensions(in_gpu)); + + // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // for(int i = 0; i != in_gpu.size(); ++i) { + // multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) + // .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); + // } + + // multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu2.layout()) + // .execute(in_gpu.base(), fw_gpu2.base(), multi::cufft::forward); + + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).imag() == 0.0 ); + // } + // { + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto const fw_gpu = multi::cufft::dft({false, true}, in_gpu, multi::cufft::forward); + + // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); + // } + // { + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft({true, false}, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto const fw_gpu = multi::cufft::dft({true, false}, in_gpu, multi::cufft::forward); + + // BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(check_thrust_complex_vs_std_complex, *boost::unit_test::tolerance(0.0001)){ - // multi::array, 1> const s_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; - // multi::array, 1> const t_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + // multi::array, 1> const s_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + // multi::array, 1> const t_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; - // multi::array, 1> s_out(s_in.extensions()); - // multi::array, 1> t_out(t_in.extensions()); + // multi::array, 1> s_out(s_in.extensions()); + // multi::array, 1> t_out(t_in.extensions()); - // multi::fftw::plan::forward({true}, s_in.base(), s_in.layout(), s_out.base(), s_out.layout()).execute(s_in.base(), s_out.base()); - // multi::fftw::plan::forward({true}, t_in.base(), t_in.layout(), t_out.base(), t_out.layout()).execute(t_in.base(), t_out.base()); + // multi::fftw::plan::forward({true}, s_in.base(), s_in.layout(), s_out.base(), s_out.layout()).execute(s_in.base(), s_out.base()); + // multi::fftw::plan::forward({true}, t_in.base(), t_in.layout(), t_out.base(), t_out.layout()).execute(t_in.base(), t_out.base()); - // BOOST_REQUIRE( std::equal(s_out.begin(), s_out.end(), t_out.begin()) ); + // BOOST_REQUIRE( std::equal(s_out.begin(), s_out.end(), t_out.begin()) ); // } // BOOST_AUTO_TEST_CASE(small_1D_cpu_vs_cpu, *boost::unit_test::tolerance(0.0001)){ - // multi::array, 1> const cpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; - // multi::thrust::cuda::array, 1> const gpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + // multi::array, 1> const cpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + // multi::thrust::cuda::array, 1> const gpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; - // multi::array, 1> cpu_out(cpu_in.extensions()); - // multi::thrust::cuda::array, 1> gpu_out(gpu_in.extensions()); + // multi::array, 1> cpu_out(cpu_in.extensions()); + // multi::thrust::cuda::array, 1> gpu_out(gpu_in.extensions()); - // multi::fftw::plan::forward({true}, cpu_in.base(), cpu_in.layout(), cpu_out.base(), cpu_out.layout()).execute (cpu_in.base(), cpu_out.base()); - // multi::cufft::plan<1> ({true}, gpu_in.layout(), gpu_out.layout()).execute_forward(gpu_in.base(), gpu_out.base()); + // multi::fftw::plan::forward({true}, cpu_in.base(), cpu_in.layout(), cpu_out.base(), cpu_out.layout()).execute (cpu_in.base(), cpu_out.base()); + // multi::cufft::plan<1> ({true}, gpu_in.layout(), gpu_out.layout()).execute_forward(gpu_in.base(), gpu_out.base()); // } // BOOST_AUTO_TEST_CASE(cufft_1D_combinations, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu + // using complex = thrust::complex; // this can't be std::complex in the gpu - // auto const in_cpu = std::invoke([]{ - // multi::array ret({128}, complex{}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(1.0, 88.0); + // auto const in_cpu = std::invoke([]{ + // multi::array ret({128}, complex{}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(1.0, 88.0); - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); - // for(auto c : std::vector>{ - // {true} //, - // // {false}, - // }){ - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // for(auto c : std::vector>{ + // {true} //, + // // {false}, + // }){ + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // for(auto const idx : extension(in_cpu)) { - // std::cout << "A: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; - // } + // for(auto const idx : extension(in_cpu)) { + // std::cout << "A: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + // } - // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); - // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); - // auto fw_cpu = multi::array(extensions(in_cpu)); - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + // auto fw_cpu = multi::array(extensions(in_cpu)); + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - // auto p_cpu = multi::fftw::plan::forward(c, in_cpu.base(), in_cpu.layout(), fw_cpu.base(), fw_cpu.layout()); - // auto p_gpu = multi::cufft::plan<1> (c, in_gpu.layout(), fw_gpu.layout()); + // auto p_cpu = multi::fftw::plan::forward(c, in_cpu.base(), in_cpu.layout(), fw_cpu.base(), fw_cpu.layout()); + // auto p_gpu = multi::cufft::plan<1> (c, in_gpu.layout(), fw_gpu.layout()); - // for(auto const idx : extension(in_cpu)) { - // std::cout << "B: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; - // } + // for(auto const idx : extension(in_cpu)) { + // std::cout << "B: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + // } - // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); - // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); - // p_cpu.execute (in_cpu.base(), fw_cpu.base()); - // p_gpu.execute_forward(in_gpu.base(), fw_gpu.base()); + // p_cpu.execute (in_cpu.base(), fw_cpu.base()); + // p_gpu.execute_forward(in_gpu.base(), fw_gpu.base()); - // BOOST_TEST( fw_cpu[31].real() != 0.0 ); - // BOOST_TEST( fw_cpu[31].imag() != 0.0 ); + // BOOST_TEST( fw_cpu[31].real() != 0.0 ); + // BOOST_TEST( fw_cpu[31].imag() != 0.0 ); - // for(auto const idx : extension(in_cpu)) { - // std::cout << "C: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; - // } + // for(auto const idx : extension(in_cpu)) { + // std::cout << "C: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + // } - // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); - // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + // BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + // BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); - // for(auto const idx : extension(in_cpu)) { - // std::cout << idx << ": " << fw_cpu[idx] << ", " << fw_gpu[idx] << std::endl; - // } + // for(auto const idx : extension(in_cpu)) { + // std::cout << idx << ": " << fw_cpu[idx] << ", " << fw_gpu[idx] << std::endl; + // } - // BOOST_TEST( complex(fw_gpu[31]).real() == fw_cpu[31].real() ); - // BOOST_TEST( complex(fw_gpu[31]).imag() == fw_cpu[31].imag() ); - // } + // BOOST_TEST( complex(fw_gpu[31]).real() == fw_cpu[31].real() ); + // BOOST_TEST( complex(fw_gpu[31]).imag() == fw_cpu[31].imag() ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_2D_combinations, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu - - // auto const in_cpu = std::invoke([]{ - // multi::array ret({10, 20}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(-1.0, 1.0); - - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); - - // for(auto c : std::vector>{ - // {true , true }, - // {true , false}, - // {false, true }//, - // // {false, false} - // }){ - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - - // BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); - // BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); - - // multi::cufft::plan<2>(c, in_gpu.layout(), fw_gpu.layout()) - // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); - - // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); - // } + // using complex = thrust::complex; // this can't be std::complex in the gpu + + // auto const in_cpu = std::invoke([]{ + // multi::array ret({10, 20}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(-1.0, 1.0); + + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); + + // for(auto c : std::vector>{ + // {true , true }, + // {true , false}, + // {false, true }//, + // // {false, false} + // }){ + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + // BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); + // BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); + + // multi::cufft::plan<2>(c, in_gpu.layout(), fw_gpu.layout()) + // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_2D_combinations_inplace, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu + // using complex = thrust::complex; // this can't be std::complex in the gpu - // auto const in_cpu = std::invoke([]{ - // multi::array ret({10, 20}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(-1.0, 1.0); + // auto const in_cpu = std::invoke([]{ + // multi::array ret({10, 20}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(-1.0, 1.0); - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); - // for(auto c : std::vector>{ - // {true , true }, - // {true , false}, - // {false, true }//, - // // {false, false} - // }){ - // auto fw_cpu = in_cpu; - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // for(auto c : std::vector>{ + // {true , true }, + // {true , false}, + // {false, true }//, + // // {false, false} + // }){ + // auto fw_cpu = in_cpu; + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // multi::fftw::dft(c, fw_cpu, multi::fftw::forward); + // multi::fftw::dft(c, fw_cpu, multi::fftw::forward); - // auto fw_gpu = in_gpu; + // auto fw_gpu = in_gpu; - // BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); - // BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); + // BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); + // BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); - // multi::cufft::plan<2>(c, fw_gpu.layout(), fw_gpu.layout()) - // .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); + // multi::cufft::plan<2>(c, fw_gpu.layout(), fw_gpu.layout()) + // .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); - // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); - // } + // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_3D, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu - - // auto const in_cpu = std::invoke([]{ - // multi::array ret({10, 20, 30}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(-1.0, 1.0); - - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); - - // for(auto c : std::vector>{ - // {true , true , true }, - // {true , true , false}, - // {true , false, true }, - // {true , false, false}, - // {false, true , true }, - // {false, true , false}, - // {false, false, true }//, - // // {false, false, false} - // }){ - // auto fw_cpu = multi::array(extensions(in_cpu)); - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - - // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - - // multi::cufft::dft(c, in_gpu, fw_gpu, multi::cufft::forward); - - // BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); - - // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); - // } + // using complex = thrust::complex; // this can't be std::complex in the gpu + + // auto const in_cpu = std::invoke([]{ + // multi::array ret({10, 20, 30}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(-1.0, 1.0); + + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); + + // for(auto c : std::vector>{ + // {true , true , true }, + // {true , true , false}, + // {true , false, true }, + // {true , false, false}, + // {false, true , true }, + // {false, true , false}, + // {false, false, true }//, + // // {false, false, false} + // }){ + // auto fw_cpu = multi::array(extensions(in_cpu)); + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + // multi::cufft::dft(c, in_gpu, fw_gpu, multi::cufft::forward); + + // BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); + + // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_3D_inplace, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu - - // auto const in_cpu = std::invoke([]{ - // multi::array ret({10, 20, 30}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(-1.0, 1.0); - - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); - - // for(auto c : std::vector>{ - // {true , true , true }, - // {true , true , false}, - // {true , false, true }, - // {true , false, false}, - // {false, true , true }, - // {false, true , false}, - // {false, false, true }//, - // // {false, false, false} - // }){ - // auto fw_cpu = in_cpu; - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - - // multi::fftw::dft(c, fw_cpu, multi::fftw::forward); - // auto fw_gpu = in_gpu; - - // multi::cufft::plan<3>(c, fw_gpu.layout(), fw_gpu.layout()) - // .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); - - // BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); - // BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); - - // std::cerr << "case " << c[0] << " " << c[1] << " " << c[2] << std::endl; - - // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); - // } + // using complex = thrust::complex; // this can't be std::complex in the gpu + + // auto const in_cpu = std::invoke([]{ + // multi::array ret({10, 20, 30}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(-1.0, 1.0); + + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); + + // for(auto c : std::vector>{ + // {true , true , true }, + // {true , true , false}, + // {true , false, true }, + // {true , false, false}, + // {false, true , true }, + // {false, true , false}, + // {false, false, true }//, + // // {false, false, false} + // }){ + // auto fw_cpu = in_cpu; + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + // multi::fftw::dft(c, fw_cpu, multi::fftw::forward); + // auto fw_gpu = in_gpu; + + // multi::cufft::plan<3>(c, fw_gpu.layout(), fw_gpu.layout()) + // .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + // BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); + // BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); + + // std::cerr << "case " << c[0] << " " << c[1] << " " << c[2] << std::endl; + + // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_4D, *boost::unit_test::tolerance(0.0001)){ - // using complex = thrust::complex; // this can't be std::complex in the gpu - - // auto const in_cpu = std::invoke([]{ - // multi::array ret({10, 20, 30, 40}); - // std::default_random_engine generator; - // std::uniform_real_distribution distribution(-1.0, 1.0); - - // std::generate( - // reinterpret_cast(ret.data_elements()), - // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} - // ); - // return ret; - // }); - - // for(auto c : std::vector>{ - // // {true , true , true , true }, - // {true , true , true , false}, - // {true , true , false, true }, - // {true , true , false, false}, - // {true , false, true , true }, - // {true , false, true , false}, - // {true , false, false, true }, - // {true , false, false, false}, - // {false, true , true , true }, - // {false, true , true , false}, - // {false, true , false, true }, - // {false, true , false, false}, - // {false, false, true , true }, - // {false, false, true , false}, - // {false, false, false, true }//, - // // {false, false, false, false} - // }){ - // auto fw_cpu = multi::array(extensions(in_cpu)); - // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); - - // BOOST_TEST( fw_cpu[4][3][2][1].real() != 0.0 ); - // BOOST_TEST( fw_cpu[4][3][2][1].imag() != 0.0 ); - - // multi::cufft::plan<4>(c, in_gpu.layout(), fw_gpu.layout()) - // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); - - // BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).real() == 0.0 ); - // BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).imag() == 0.0 ); - // } + // using complex = thrust::complex; // this can't be std::complex in the gpu + + // auto const in_cpu = std::invoke([]{ + // multi::array ret({10, 20, 30, 40}); + // std::default_random_engine generator; + // std::uniform_real_distribution distribution(-1.0, 1.0); + + // std::generate( + // reinterpret_cast(ret.data_elements()), + // reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + // ); + // return ret; + // }); + + // for(auto c : std::vector>{ + // // {true , true , true , true }, + // {true , true , true , false}, + // {true , true , false, true }, + // {true , true , false, false}, + // {true , false, true , true }, + // {true , false, true , false}, + // {true , false, false, true }, + // {true , false, false, false}, + // {false, true , true , true }, + // {false, true , true , false}, + // {false, true , false, true }, + // {false, true , false, false}, + // {false, false, true , true }, + // {false, false, true , false}, + // {false, false, false, true }//, + // // {false, false, false, false} + // }){ + // auto fw_cpu = multi::array(extensions(in_cpu)); + // multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + // BOOST_TEST( fw_cpu[4][3][2][1].real() != 0.0 ); + // BOOST_TEST( fw_cpu[4][3][2][1].imag() != 0.0 ); + + // multi::cufft::plan<4>(c, in_gpu.layout(), fw_gpu.layout()) + // .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + // BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).real() == 0.0 ); + // BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).imag() == 0.0 ); + // } // } // BOOST_AUTO_TEST_CASE(cufft_3D_timing, *boost::unit_test::tolerance(0.0001)){ - // auto x = multi::extensions_t<3>{300, 300, 300}; - // { - // auto const in_cpu = multi::array(x, 10.0); - // BOOST_ASSERT( in_cpu.num_elements()*sizeof(complex) < 2e9 ); - // auto fw_cpu = multi::array(extensions(in_cpu), 99.0); - // { - // // boost::timer::auto_cpu_timer t; // 1.041691s wall, 1.030000s user + 0.000000s system = 1.030000s CPU (98.9%) - // multi::fftw::dft_forward({true, true}, in_cpu, fw_cpu); - // BOOST_TEST( fw_cpu[8][9][10] != 99.0 ); - // } - - // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; // (x, 10.0); - // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); - // { - // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu), 99.0); - // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); - // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - // boost::multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); - // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); - // BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); - // BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); - // } - // { - // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - // auto const fw_gpu2 = boost::multi::cufft::dft({true, true}, in_gpu, multi::cufft::forward); - // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); - // BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); - // BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); - // } - // } + // auto x = multi::extensions_t<3>{300, 300, 300}; + // { + // auto const in_cpu = multi::array(x, 10.0); + // BOOST_ASSERT( in_cpu.num_elements()*sizeof(complex) < 2e9 ); + // auto fw_cpu = multi::array(extensions(in_cpu), 99.0); + // { + // // boost::timer::auto_cpu_timer t; // 1.041691s wall, 1.030000s user + 0.000000s system = 1.030000s CPU (98.9%) + // multi::fftw::dft_forward({true, true}, in_cpu, fw_cpu); + // BOOST_TEST( fw_cpu[8][9][10] != 99.0 ); + // } + + // auto const in_gpu = multi::thrust::cuda::array{in_cpu}; // (x, 10.0); + // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + // { + // auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu), 99.0); + // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + // boost::multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + // BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); + // BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); + // } + // { + // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + // auto const fw_gpu2 = boost::multi::cufft::dft({true, true}, in_gpu, multi::cufft::forward); + // cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + // BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); + // BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); + // } + // } // #if 1 - // { - // multi::thrust::cuda::universal_array const in_gpu(x, 10.); - // multi::thrust::cuda::universal_array fw_gpu(extensions(in_gpu), 99.); - - // // multi::cuda::managed::array const in_gpu(x, 10.); - // // multi::cuda::managed::array fw_gpu(extensions(in_gpu), 99.); - // { - // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - // multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); - // // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); - // } - // { - // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - // multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); - // // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); - // } - // } + // { + // multi::thrust::cuda::universal_array const in_gpu(x, 10.); + // multi::thrust::cuda::universal_array fw_gpu(extensions(in_gpu), 99.); + + // // multi::cuda::managed::array const in_gpu(x, 10.); + // // multi::cuda::managed::array fw_gpu(extensions(in_gpu), 99.); + // { + // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + // multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + // // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); + // } + // { + // // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + // multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + // // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); + // } + // } // #endif // } @@ -570,151 +581,151 @@ auto main() -> int { // BOOST_AUTO_TEST_CASE(cufft_combinations, *utf::tolerance(0.00001)){ - // auto const in = []{ - // multi::array ret({32, 90, 98, 96}); - // std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - // ); - // return ret; - // }(); - // std::clog<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n"; - - // multi::thrust::cuda::universal_array const in_gpu = in; - // multi::thrust::cuda::universal_array const in_mng = in; - - // using std::clog; - // for(auto c : std::vector>{ - // {false, true , true , true }, - // {false, true , true , false}, - // {true , false, false, false}, - // {true , true , false, false}, - // {false, false, true , false}, - // {false, false, false, false}, - // }){ - // std::clog<<"case "; copy(begin(c), end(c), std::ostream_iterator{std::clog,", "}); std::clog< out = in; - // multi::array in_rw = in; - // [&, _ = watch{"cpu_opl "}]{ - // multi::fftw::dft_forward(c, in, out); - // }(); - // [&, _ = watch{"cpu_ipl "}]{ - // multi::fftw::dft(c, in_rw, multi::fftw::forward); - // // BOOST_TEST( abs( static_cast>(in_rw[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); - // }(); - // { - // multi::array in_rw2 = in; - // [&, _ = watch{"cpu_mov "}]{ - // multi::array const out_mov = multi::fftw::dft_forward(c, std::move(in_rw2)); - // // what(out_mov); - // // BOOST_TEST( abs( static_cast>(out_mov[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); - // BOOST_REQUIRE( is_empty(in_rw2) ); - // BOOST_REQUIRE( extensions(out_mov) == extensions(in) ); - // }(); - // } - - // [&, _ = watch{"cpu_new "}]{ - // auto const out_cpy = multi::fftw::dft_forward(c, in); - // BOOST_TEST( abs( static_cast>(out_cpy[5][4][3][1]) - std::complex(out[5][4][3][1]) ) == 0. ); - // }(); - // multi::thrust::cuda::array out_gpu(extensions(in_gpu)); - // [&, _ = watch{"gpu_opl "}]{ - // multi::cufft::dft(c, in_gpu , out_gpu, multi::cufft::forward); - // BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - // }(); - // { - // multi::thrust::cuda::array in_rw_gpu = in_gpu; - // [&, _ = watch{"gpu_ipl "}]{ - // multi::cufft::dft(c, in_rw_gpu, multi::cufft::forward); - // BOOST_TEST( abs( static_cast(in_rw_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - // }(); - // } - // { - // multi::thrust::cuda::array in_rw_gpu = in_gpu; - // [&, _ = watch{"gpu_mov "}]{ - // multi::thrust::cuda::array const out_mov = multi::cufft::dft_forward(c, std::move(in_rw_gpu)); - // // BOOST_REQUIRE( in_rw_gpu.empty() ); - // // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - // }(); - // } - // { - // multi::thrust::cuda::array in_rw_gpu = in_gpu; - // [&, _ = watch{"gpu_mov "}]{ - // multi::thrust::cuda::array out_mov = std::move(in_rw_gpu); - // multi::cufft::dft(c, out_mov, multi::cufft::forward); - // // BOOST_REQUIRE( in_rw_gpu.empty() ); - // // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - // }(); - // } - // cudaDeviceSynchronize(); - // [&, _ = watch{"gpu_new "}]{ - // multi::thrust::cuda::array const out_cpy = multi::cufft::dft(c, in_gpu, multi::cufft::forward); - // }(); - // multi::thrust::cuda::universal_array out_mng(extensions(in_mng)); - // [&, _ = watch{"mng_cld "}]{ - // multi::cufft::dft(c, in_mng, out_mng, multi::cufft::forward); - // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - // }(); - // [&, _ = watch{"mng_hot "}]{ - // multi::cufft::dft(c, in_mng , out_mng, multi::cufft::forward); - // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - // }(); - // [&, _ = watch{"mng_new "}]{ - // auto const out_mng = multi::cufft::dft(c, in_mng, multi::cufft::forward); - // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - // }(); - // } - // // std::clog<<"cache size " - // // << multi::cufft::plan::cache<1>().size() <<' ' - // // << multi::cufft::plan::cache<2>().size() <<' ' - // // << multi::cufft::plan::cache<3>().size() <<' ' - // // << multi::cufft::plan::cache<4>().size() <<' ' - // // < ret({32, 90, 98, 96}); + // std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), + // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + // ); + // return ret; + // }(); + // std::clog<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n"; + + // multi::thrust::cuda::universal_array const in_gpu = in; + // multi::thrust::cuda::universal_array const in_mng = in; + + // using std::clog; + // for(auto c : std::vector>{ + // {false, true , true , true }, + // {false, true , true , false}, + // {true , false, false, false}, + // {true , true , false, false}, + // {false, false, true , false}, + // {false, false, false, false}, + // }){ + // std::clog<<"case "; copy(begin(c), end(c), std::ostream_iterator{std::clog,", "}); std::clog< out = in; + // multi::array in_rw = in; + // [&, _ = watch{"cpu_opl "}]{ + // multi::fftw::dft_forward(c, in, out); + // }(); + // [&, _ = watch{"cpu_ipl "}]{ + // multi::fftw::dft(c, in_rw, multi::fftw::forward); + // // BOOST_TEST( abs( static_cast>(in_rw[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); + // }(); + // { + // multi::array in_rw2 = in; + // [&, _ = watch{"cpu_mov "}]{ + // multi::array const out_mov = multi::fftw::dft_forward(c, std::move(in_rw2)); + // // what(out_mov); + // // BOOST_TEST( abs( static_cast>(out_mov[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); + // BOOST_REQUIRE( is_empty(in_rw2) ); + // BOOST_REQUIRE( extensions(out_mov) == extensions(in) ); + // }(); + // } + + // [&, _ = watch{"cpu_new "}]{ + // auto const out_cpy = multi::fftw::dft_forward(c, in); + // BOOST_TEST( abs( static_cast>(out_cpy[5][4][3][1]) - std::complex(out[5][4][3][1]) ) == 0. ); + // }(); + // multi::thrust::cuda::array out_gpu(extensions(in_gpu)); + // [&, _ = watch{"gpu_opl "}]{ + // multi::cufft::dft(c, in_gpu , out_gpu, multi::cufft::forward); + // BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + // }(); + // { + // multi::thrust::cuda::array in_rw_gpu = in_gpu; + // [&, _ = watch{"gpu_ipl "}]{ + // multi::cufft::dft(c, in_rw_gpu, multi::cufft::forward); + // BOOST_TEST( abs( static_cast(in_rw_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + // }(); + // } + // { + // multi::thrust::cuda::array in_rw_gpu = in_gpu; + // [&, _ = watch{"gpu_mov "}]{ + // multi::thrust::cuda::array const out_mov = multi::cufft::dft_forward(c, std::move(in_rw_gpu)); + // // BOOST_REQUIRE( in_rw_gpu.empty() ); + // // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + // }(); + // } + // { + // multi::thrust::cuda::array in_rw_gpu = in_gpu; + // [&, _ = watch{"gpu_mov "}]{ + // multi::thrust::cuda::array out_mov = std::move(in_rw_gpu); + // multi::cufft::dft(c, out_mov, multi::cufft::forward); + // // BOOST_REQUIRE( in_rw_gpu.empty() ); + // // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + // }(); + // } + // cudaDeviceSynchronize(); + // [&, _ = watch{"gpu_new "}]{ + // multi::thrust::cuda::array const out_cpy = multi::cufft::dft(c, in_gpu, multi::cufft::forward); + // }(); + // multi::thrust::cuda::universal_array out_mng(extensions(in_mng)); + // [&, _ = watch{"mng_cld "}]{ + // multi::cufft::dft(c, in_mng, out_mng, multi::cufft::forward); + // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + // }(); + // [&, _ = watch{"mng_hot "}]{ + // multi::cufft::dft(c, in_mng , out_mng, multi::cufft::forward); + // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + // }(); + // [&, _ = watch{"mng_new "}]{ + // auto const out_mng = multi::cufft::dft(c, in_mng, multi::cufft::forward); + // BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + // }(); + // } + // // std::clog<<"cache size " + // // << multi::cufft::plan::cache<1>().size() <<' ' + // // << multi::cufft::plan::cache<2>().size() <<' ' + // // << multi::cufft::plan::cache<3>().size() <<' ' + // // << multi::cufft::plan::cache<4>().size() <<' ' + // // < ret({45, 18, 32, 16}); - // std::generate( - // ret.data_elements(), ret.data_elements() + ret.num_elements(), - // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - // ); - // return ret; - // }(); + // auto const in_cpu = []{ + // multi::array ret({45, 18, 32, 16}); + // std::generate( + // ret.data_elements(), ret.data_elements() + ret.num_elements(), + // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + // ); + // return ret; + // }(); - // multi::thrust::cuda::array const in = in_cpu; - // multi::thrust::cuda::array out(extensions(in)); + // multi::thrust::cuda::array const in = in_cpu; + // multi::thrust::cuda::array out(extensions(in)); // #if 0 - // multi::cufft::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), +1); + // multi::cufft::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), +1); - // multi::array out_cpu(extensions(in)); - // multi::fft::many_dft(begin(unrotated(in_cpu)), end(unrotated(in_cpu)), begin(unrotated(out_cpu)), +1); + // multi::array out_cpu(extensions(in)); + // multi::fft::many_dft(begin(unrotated(in_cpu)), end(unrotated(in_cpu)), begin(unrotated(out_cpu)), +1); - // BOOST_TEST( imag( static_cast(out[5][4][3][2]) - out_cpu[5][4][3][2]) == 0. ); + // BOOST_TEST( imag( static_cast(out[5][4][3][2]) - out_cpu[5][4][3][2]) == 0. ); // #endif // } // #if 0 // BOOST_AUTO_TEST_CASE(cufft_4D, *utf::tolerance(0.00001) ){ - // auto const in = []{ - // multi::array ret({10, 10, 10}); - // std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - // ); - // return ret; - // }(); - - // multi::array out(extensions(in)); + // auto const in = []{ + // multi::array ret({10, 10, 10}); + // std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), + // [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + // ); + // return ret; + // }(); + + // multi::array out(extensions(in)); // // multi::fftw::dft({true, false, true}, in, out, multi::fftw::forward); - // multi::fftw::many_dft(begin(in.rotated()), end(in.rotated()), begin(out.rotated()), multi::fftw::forward); + // multi::fftw::many_dft(begin(in.rotated()), end(in.rotated()), begin(out.rotated()), multi::fftw::forward); - // multi::thrust::cuda::array in_gpu = in; - // multi::thrust::cuda::array out_gpu(extensions(in)); + // multi::thrust::cuda::array in_gpu = in; + // multi::thrust::cuda::array out_gpu(extensions(in)); // // multi::cufft::dft({true, false, true}, in_gpu, out_gpu, multi::fft::forward);//multi::cufft::forward); - // // multi::cufft::many_dft(begin(in_gpu.rotated()), end(in_gpu.rotated()), begin( out_gpu.rotated() ), multi::fftw::forward); - // // BOOST_TEST( ( static_cast(out_gpu[5][4][3]) - out[5][4][3]).imag() == 0. ); + // // multi::cufft::many_dft(begin(in_gpu.rotated()), end(in_gpu.rotated()), begin( out_gpu.rotated() ), multi::fftw::forward); + // // BOOST_TEST( ( static_cast(out_gpu[5][4][3]) - out[5][4][3]).imag() == 0. ); // } // #endif // #endif diff --git a/include/boost/multi/adaptors/fft.hpp b/include/boost/multi/adaptors/fft.hpp index b87f86803..8c513b85b 100644 --- a/include/boost/multi/adaptors/fft.hpp +++ b/include/boost/multi/adaptors/fft.hpp @@ -11,33 +11,195 @@ #include "../adaptors/hipfft.hpp" #endif -#define BOOST_MULTI_DECLRETURN_(ExpR) -> decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +#define BOOST_MULTI_DECLRETURN_(ExpR) -> decltype(ExpR) { return ExpR; } // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +#define BOOST_MULTI_JUSTRETURN_(ExpR) -> decltype(auto) { return ExpR; } // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing namespace boost::multi::fft{ - static inline constexpr int forward = static_cast(fftw::forward); - static inline constexpr int none = static_cast(fftw::none); - static inline constexpr int backward = static_cast(fftw::backward); + static inline int const forward = static_cast(fftw::forward); + static inline int const none = static_cast(fftw::none); + static inline int const backward = static_cast(fftw::backward); - static_assert( forward != none && none != backward && backward != forward ); + // static_assert( forward != none && none != backward && backward != forward ); template struct priority : std::conditional_t>{}; - template auto dft_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_backward(std::forward(args)...)) - template auto dft_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(::boost::multi::cufft ::dft_backward(std::forward(args)...)) - template auto dft(Args&&... args) BOOST_MULTI_DECLRETURN_(dft_backward_aux_(priority<1>{}, std::forward(args)...)) - template auto dft(std::array::dimensionality> which, In const& in, Args&&... args) -> decltype(auto) {return dft_aux(priority<1>{}, which, in, std::forward(args)...);} + template auto dft_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft(std::forward(args)...)) + #if defined(__CUDA__) || defined(__NVCC__) + template auto dft_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(::boost::multi::cufft ::dft(std::forward(args)...)) + #endif + template< class... Args> auto dft(Args&&... args) BOOST_MULTI_DECLRETURN_(dft_aux_(priority<1>{}, std::forward(args)...)) + + template auto dft(std::array::dimensionality> which, In&& in, Args&&... args) BOOST_MULTI_DECLRETURN_(dft_aux(priority<1>{}, which, std::forward(in), std::forward(args)...)) template auto dft_forward_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_forward(std::forward(args)...)) + #if defined(__CUDA__) || defined(__NVCC__) template auto dft_forward_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_forward(std::forward(args)...)) - template auto dft_forward(std::array which, In const& in, Args&&... args) -> decltype(auto) {return dft_forward_aux(priority<1>{}, which, in, std::forward(args)...);} + #endif + template auto dft_forward(std::array::dimensionality> which, In&& in, Args&&... args) BOOST_MULTI_DECLRETURN_(dft_forward_aux(priority<1>{}, which, std::forward(in), std::forward(args)...)) template auto dft_backward_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_backward(std::forward(args)...)) + #if defined(__CUDA__) || defined(__NVCC__) template auto dft_backward_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_backward(std::forward(args)...)) + #endif template auto dft_backward(std::array which, In const& in, Args&&... args) -> decltype(auto) {return dft_backward_aux(priority<1>{}, which, in, std::forward(args)...);} + template + class dft_range { + public: + static constexpr auto dimensionality = std::decay_t::dimensionality; + + private: + std::array which_; + In in_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + Direction dir_; + + struct const_iterator : private std::decay_t::const_iterator { + // static constexpr auto dimensionality = In::const_iterator::dimensionality; + + private: + bool do_; + std::array sub_which_; + Direction dir_; + + public: + const_iterator( + typename std::decay_t::const_iterator it, + bool doo, std::array::dimensionality - 1> sub_which, + Direction dir + ) : std::decay_t::const_iterator{it}, do_{doo}, sub_which_{sub_which}, dir_{dir} {} + + using typename std::decay_t::const_iterator::difference_type; + using typename std::decay_t::const_iterator::value_type; + using pointer = void*; + using reference = dft_range::const_iterator::reference, Direction>; + using iterator_category = std::random_access_iterator_tag; + + auto operator+(difference_type n) const { return const_iterator{static_cast::const_iterator const&>(*this) + n, do_, sub_which_, dir_}; } + friend auto operator-(const_iterator const& lhs, const_iterator const& rhs) { + return static_cast::const_iterator const&>(lhs) - static_cast::const_iterator const&>(rhs); + } + + auto operator*() const { + class fake_array { + multi::extensions_t extensions_; + public: + explicit fake_array(multi::extensions_t ext) : extensions_{ext} {} + auto extensions() const {return extensions_;} + // multi::size_t size_; + auto extension() const { using std::get; return get<0>(extensions()); } + auto size() const { return extension().size(); } + } fa{(*static_cast::const_iterator const&>(*this)).extensions()}; + return fa; + } + + private: + template + auto copy_(const_iterator const& last, It const& first_d) const -> decltype(auto) { + auto const count = last - *this; + dft( + std::apply([doo = do_](auto... es) { return std::array{doo, es...}; }, sub_which_), + multi::const_subarray::const_iterator::element, dimensionality, typename std::decay_t::const_iterator::element_ptr>( + static_cast::const_iterator const&>(*this), + static_cast::const_iterator const&>(last) + ), + multi::subarray::element, dimensionality, typename It::element_ptr>( + first_d, first_d + count + ), + dir_ + ); + return first_d + count; + } + + public: + template + auto capy(const_iterator const& last, It const& first_d) const -> decltype(auto) { + return copy_(last, first_d); + } + + template + friend auto copy(const_iterator const& first, const_iterator const& last, It const& first_d) -> decltype(auto) { + return first.copy_(last, first_d); + } + + template + friend auto copy_n(const_iterator const& first, Size const& count, It const& first_d) -> decltype(auto) { + return first.copy_(first + count, first_d); + } + + template + friend auto uninitialized_copy_n(const_iterator const& first, Size const& count, It const& first_d) -> decltype(auto) { + return copy_n(first, count, first_d); + } + + template + friend auto uninitialized_copy(const_iterator const& first, const_iterator const& last, It const& first_d) -> decltype(auto) { + return first.copy_(last, first_d); + } + }; + + public: + template + dft_range(std::array::dimensionality> which, In2&& in, Direction dir) : which_{which}, in_(std::forward(in)), dir_{dir} {} + auto begin() const { return const_iterator(in_.begin(), which_[0], std::apply([](auto /*e0*/, auto... es) { return std::array{es...}; }, which_), dir_); } + auto end () const { return const_iterator(in_.end (), which_[0], std::apply([](auto /*e0*/, auto... es) { return std::array{es...}; }, which_), dir_); } + + auto extensions() const { return in_.extensions(); } + auto size() const { return in_.size(); } + }; + + template + auto dft(std::array::dimensionality> which, In&& in, Direction dir) { + return dft_range(which, std::forward(in), dir); + } + + template + auto dft(std::array::dimensionality> which, In&& in) { + return dft(which, std::forward(in), fft::forward); + } + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-value" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-value" +#endif + template + auto dft_all(In&& in) { + auto const all_true = std::apply([](auto... es) { return std::array{(es, true)...}; }, std::array::dimensionality>{}); + return dft(all_true, std::forward(in), fft::forward); + } + + template + auto idft_all(In&& in) { + auto const all_true = std::apply([](auto... es) { return std::array{(es, true)...}; }, std::array::dimensionality>{}); + return dft(all_true, std::forward(in), fft::backward); + } +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + + template + auto idft(std::array which, In&& in) { + return dft(which, std::forward(in), fft::forward); + } + + template + auto dft_forward(std::array::dimensionality> which, In&& in) { + return dft(which, std::forward(in), fft::forward); + } + + template + auto dft_backward(std::array which, In&& in) { + return dft(which, std::forward(in), fft::backward); + } + } // end namespace boost::multi::fft #undef BOOST_MULTI_DECLRETURN_ +#undef BOOST_MULTI_JUSTRETURN_ #endif // BOOST_MULTI_ADAPTORS_FFT_HPP diff --git a/include/boost/multi/adaptors/fftw.hpp b/include/boost/multi/adaptors/fftw.hpp index afef671c1..a92974d12 100644 --- a/include/boost/multi/adaptors/fftw.hpp +++ b/include/boost/multi/adaptors/fftw.hpp @@ -335,17 +335,34 @@ inline auto initialize_threads() -> bool { #endif } -enum class sign : decltype(FFTW_FORWARD) { // NOLINT(performance-enum-size) - backward = FFTW_BACKWARD, - none = 0, - forward = FFTW_FORWARD, +// enum class sign : decltype(FFTW_FORWARD) { // NOLINT(performance-enum-size) +// backward = FFTW_BACKWARD, +// none = 0, +// forward = FFTW_FORWARD, +// }; + +class sign { + decltype(FFTW_FORWARD) value_; + + public: + constexpr sign(decltype(FFTW_FORWARD) value) noexcept : value_{value} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + static sign const backward; + static sign const none ; + static sign const forward ; + + constexpr operator decltype(FFTW_FORWARD)() const noexcept { return value_; } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) }; -constexpr inline auto backward = sign::backward; -constexpr inline auto none = sign::none; -constexpr inline auto forward = sign::forward; +inline sign const sign::backward = FFTW_BACKWARD; +inline sign const sign::none = 0; +inline sign const sign::forward = FFTW_FORWARD; + +inline auto const backward = sign::backward; +inline auto const none = sign::none; +inline auto const forward = sign::forward; -static_assert(forward != none && none != backward && backward != forward); +// static_assert(forward != none && none != backward && backward != forward); enum class direction : decltype(FFTW_FORWARD) { // NOLINT(performance-enum-size) backward = FFTW_BACKWARD, diff --git a/include/boost/multi/adaptors/fftw/test/fft.cpp b/include/boost/multi/adaptors/fftw/test/fft.cpp new file mode 100644 index 000000000..a6e3c9c4e --- /dev/null +++ b/include/boost/multi/adaptors/fftw/test/fft.cpp @@ -0,0 +1,170 @@ +// Copyright 2025 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFT adaptor" + +#include + +#include + +#include + +#include + +// IWYU pragma: no_include +#include +// IWYU pragma: no_include // for forward +// IWYU pragma: no_include + +namespace multi = boost::multi; +using complex = std::complex; + +template<> +constexpr bool multi::force_element_trivial_default_construction> = true; + +namespace { +template +__attribute__((always_inline)) inline void DoNotOptimize(T const& value) { // NOLINT(readability-identifier-naming) consistency with Google benchmark + asm volatile("" : "+m"(const_cast(value))); // NOLINT(hicpp-no-assembler,cppcoreguidelines-pro-type-const-cast) hack +} +} // end namespace + +auto main() -> int { + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) + + auto const in_cpu = multi::array{ + { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft_forward({true, true}, in_cpu, fw_cpu); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + // check properties + { + auto const& dft = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + BOOST_TEST( dft.extensions() == in_cpu.extensions() ); + BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); + BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); + } + // // assignment with right size + // { + // multi::array fw_cpu_out(in_cpu.extensions()); + // complex* const persistent_base = fw_cpu_out.base(); + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // BOOST_TEST( persistent_base == fw_cpu_out.base() ); + // } + // // assignment with incorrect size (need reallocation) + // { + // multi::array fw_cpu_out({2, 2}); + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // assignment to empty + // { + // multi::array fw_cpu_out; + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor + // { + // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // check properties + // { + // auto const& dft = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( dft.extensions() == in_cpu.extensions() ); + // BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); + // BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); + // } + // // assignment with right size + // { + // multi::array fw_cpu_out(in_cpu.extensions()); + // complex* const persistent_base = fw_cpu_out.base(); + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // BOOST_TEST( persistent_base == fw_cpu_out.base() ); + // } + // // assignment with incorrect size (need reallocation) + // { + // multi::array fw_cpu_out({2, 2}); + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // assignment to empty + // { + // multi::array fw_cpu_out; + + // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor + // { + // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor forward + // { + // multi::array const fw_cpu_out = multi::fft::dft_forward({true, true}, in_cpu); + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor forward default + // { + // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu); + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor all default + // { + // multi::array const fw_cpu_out = multi::fft::dft_all(in_cpu); + // BOOST_TEST( fw_cpu_out == fw_cpu ); + // } + // // constructor none + // { + // multi::array const fw_cpu_out = multi::fft::dft({false, false}, in_cpu); + // BOOST_TEST( fw_cpu_out == in_cpu ); + // } + // // constructor none + // { + // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu); + // BOOST_TEST( fw_cpu_out == in_cpu ); + // } + // // constructor none + // { + // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu()); + // BOOST_TEST( fw_cpu_out == in_cpu ); + // } + // // constructor none + // { + // multi::array const fw_cpu_out = in_cpu.transposed(); + // BOOST_TEST( fw_cpu_out == in_cpu.transposed() ); + // } + // // constructor none + // // { + // // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu.transposed()); + // // // BOOST_TEST( fw_cpu_out == in_cpu ); + // // } + + return boost::report_errors(); +} From c5dba66ad3a0afa3edc3488a1ee908caa669a445 Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Wed, 5 Mar 2025 21:56:58 -0800 Subject: [PATCH 2/7] functional fft --- include/boost/multi/adaptors/fftw/test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/include/boost/multi/adaptors/fftw/test/CMakeLists.txt b/include/boost/multi/adaptors/fftw/test/CMakeLists.txt index ec47dd1cb..18d5c92bd 100644 --- a/include/boost/multi/adaptors/fftw/test/CMakeLists.txt +++ b/include/boost/multi/adaptors/fftw/test/CMakeLists.txt @@ -43,6 +43,7 @@ set(TEST_SRCS combinations.cpp # copy.cpp core.cpp + fft.cpp moved.cpp shift.cpp so_shift.cpp From 397383c652bb32a10aafd786e3cc8f4bd8006b2a Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Thu, 6 Mar 2025 00:06:36 -0800 Subject: [PATCH 3/7] fix constructor --- .../boost/multi/adaptors/fftw/test/fft.cpp | 200 +++++++++--------- include/boost/multi/array_ref.hpp | 2 +- 2 files changed, 100 insertions(+), 102 deletions(-) diff --git a/include/boost/multi/adaptors/fftw/test/fft.cpp b/include/boost/multi/adaptors/fftw/test/fft.cpp index a6e3c9c4e..94b965336 100644 --- a/include/boost/multi/adaptors/fftw/test/fft.cpp +++ b/include/boost/multi/adaptors/fftw/test/fft.cpp @@ -4,13 +4,11 @@ #define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFT adaptor" -#include - -#include - +#include #include +#include -#include +#include // IWYU pragma: no_include #include @@ -34,11 +32,11 @@ auto main() -> int { complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) auto const in_cpu = multi::array{ - { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, - { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, - { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, - { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, - {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, }; auto fw_cpu = multi::array(extensions(in_cpu)); @@ -55,116 +53,116 @@ auto main() -> int { BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); } - // // assignment with right size - // { - // multi::array fw_cpu_out(in_cpu.extensions()); - // complex* const persistent_base = fw_cpu_out.base(); + // assignment with right size + { + multi::array fw_cpu_out(in_cpu.extensions()); + complex* const persistent_base = fw_cpu_out.base(); - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // BOOST_TEST( persistent_base == fw_cpu_out.base() ); - // } - // // assignment with incorrect size (need reallocation) - // { - // multi::array fw_cpu_out({2, 2}); + BOOST_TEST( fw_cpu_out == fw_cpu ); + BOOST_TEST( persistent_base == fw_cpu_out.base() ); + } + // assignment with incorrect size (need reallocation) + { + multi::array fw_cpu_out({2, 2}); - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // assignment to empty - // { - // multi::array fw_cpu_out; + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // assignment to empty + { + multi::array fw_cpu_out; - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor - // { - // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // check properties - // { - // auto const& dft = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor + { + multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // check properties + { + auto const& dft = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( dft.extensions() == in_cpu.extensions() ); - // BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); - // BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); - // } - // // assignment with right size - // { - // multi::array fw_cpu_out(in_cpu.extensions()); - // complex* const persistent_base = fw_cpu_out.base(); + BOOST_TEST( dft.extensions() == in_cpu.extensions() ); + BOOST_TEST( (*dft.begin()).size() == (*in_cpu.begin()).size() ); + BOOST_TEST( (*dft.begin()).extensions() == (*in_cpu.begin()).extensions() ); + } + // assignment with right size + { + multi::array fw_cpu_out(in_cpu.extensions()); + complex* const persistent_base = fw_cpu_out.base(); - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // BOOST_TEST( persistent_base == fw_cpu_out.base() ); - // } - // // assignment with incorrect size (need reallocation) - // { - // multi::array fw_cpu_out({2, 2}); + BOOST_TEST( fw_cpu_out == fw_cpu ); + BOOST_TEST( persistent_base == fw_cpu_out.base() ); + } + // assignment with incorrect size (need reallocation) + { + multi::array fw_cpu_out({2, 2}); - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // assignment to empty - // { - // multi::array fw_cpu_out; + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // assignment to empty + { + multi::array fw_cpu_out; - // fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor - // { - // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor forward - // { - // multi::array const fw_cpu_out = multi::fft::dft_forward({true, true}, in_cpu); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor forward default - // { - // multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor all default - // { - // multi::array const fw_cpu_out = multi::fft::dft_all(in_cpu); - // BOOST_TEST( fw_cpu_out == fw_cpu ); - // } - // // constructor none - // { - // multi::array const fw_cpu_out = multi::fft::dft({false, false}, in_cpu); - // BOOST_TEST( fw_cpu_out == in_cpu ); - // } - // // constructor none - // { - // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu); - // BOOST_TEST( fw_cpu_out == in_cpu ); - // } + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor + { + multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu, multi::fft::forward); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor forward + { + multi::array const fw_cpu_out = multi::fft::dft_forward({true, true}, in_cpu); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor forward default + { + multi::array const fw_cpu_out = multi::fft::dft({true, true}, in_cpu); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor all default + { + multi::array const fw_cpu_out = multi::fft::dft_all(in_cpu); + BOOST_TEST( fw_cpu_out == fw_cpu ); + } + // constructor none + { + multi::array const fw_cpu_out = multi::fft::dft({false, false}, in_cpu); + BOOST_TEST( fw_cpu_out == in_cpu ); + } + // constructor none + { + multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu); + BOOST_TEST( fw_cpu_out == in_cpu ); + } + // constructor none + { + multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu()); + BOOST_TEST( fw_cpu_out == in_cpu ); + } // // constructor none // { - // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu()); - // BOOST_TEST( fw_cpu_out == in_cpu ); + // multi::array const fw_cpu_out = in_cpu.transposed(); + // BOOST_TEST( fw_cpu_out == in_cpu.transposed() ); // } // // constructor none // { - // multi::array const fw_cpu_out = in_cpu.transposed(); - // BOOST_TEST( fw_cpu_out == in_cpu.transposed() ); + // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu.transposed()); + // // BOOST_TEST( fw_cpu_out == in_cpu ); // } - // // constructor none - // // { - // // multi::array const fw_cpu_out = multi::fft::dft({}, in_cpu.transposed()); - // // // BOOST_TEST( fw_cpu_out == in_cpu ); - // // } return boost::report_errors(); } diff --git a/include/boost/multi/array_ref.hpp b/include/boost/multi/array_ref.hpp index 9f19f02df..6092e6c33 100644 --- a/include/boost/multi/array_ref.hpp +++ b/include/boost/multi/array_ref.hpp @@ -1548,7 +1548,7 @@ struct const_subarray : array_types { // using reverse_iterator [[deprecated]] = std::reverse_iterator< iterator>; // using const_reverse_iterator [[deprecated]] = std::reverse_iterator; - const_subarray(iterator first, iterator last) + const_subarray(const_iterator first, const_iterator last) : const_subarray(layout_type(first->layout(), first.stride(), 0, (last - first)*first->size()), first.base()) { assert(first->layout() == last->layout()); } From 23bda933b168d8badbe0577e6f941e3fdc58b7ba Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Fri, 7 Mar 2025 18:56:18 -0800 Subject: [PATCH 4/7] make fft sfinae friendly --- include/boost/multi/adaptors/cufft.hpp | 28 +++++++++++++++---- .../multi/adaptors/cufft/test/CMakeLists.txt | 4 +-- include/boost/multi/adaptors/fft.hpp | 2 +- .../multi/adaptors/fftw/test/CMakeLists.txt | 2 +- .../multi/adaptors/tblis/test/CMakeLists.txt | 2 +- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/include/boost/multi/adaptors/cufft.hpp b/include/boost/multi/adaptors/cufft.hpp index fd3628177..10bcc7a6b 100644 --- a/include/boost/multi/adaptors/cufft.hpp +++ b/include/boost/multi/adaptors/cufft.hpp @@ -84,7 +84,7 @@ constexpr sign backward{CUFFT_INVERSE}; static_assert(forward != none && none != backward && backward != forward); -template +template class plan { Alloc alloc_; ::size_t workSize_ = 0; @@ -185,6 +185,8 @@ class plan { } } + static_assert(sizeof(ILayout*) == 0); + if(first_howmany_ == D) { if constexpr(std::is_same_v) { cufftSafeCall(::cufftPlanMany( @@ -286,6 +288,7 @@ class plan { private: + template void ExecZ2Z_(complex_type const* idata, complex_type* odata, int direction) const{ cufftSafeCall(cufftExecZ2Z(h_, const_cast(idata), odata, direction)); // NOLINT(cppcoreguidelines-pro-type-const-cast) wrap legacy interface // cudaDeviceSynchronize(); @@ -293,7 +296,12 @@ class plan { public: template - void execute(IPtr idata, OPtr odata, int direction) { // TODO(correaa) make const + auto execute(IPtr idata, OPtr odata, int direction) + -> decltype((void)( + reinterpret_cast(::thrust::raw_pointer_cast(idata)), + reinterpret_cast(::thrust::raw_pointer_cast(odata)) + )) + { // TODO(correaa) make const if(first_howmany_ == DD) { ExecZ2Z_(reinterpret_cast(::thrust::raw_pointer_cast(idata)), reinterpret_cast(::thrust::raw_pointer_cast(odata)), direction); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) wrap a legacy interface return; @@ -381,24 +389,32 @@ class cached_plan { if(it_ == LEAKY_cache.end()) {it_ = LEAKY_cache.insert(std::make_pair(std::make_tuple(which, in, out), plan(which, in, out, alloc))).first;} } template - void execute(IPtr idata, OPtr odata, int direction) { + auto execute(IPtr idata, OPtr odata, int direction) + -> decltype( + (void) + (std::declval< + typename std::map, multi::layout_t, multi::layout_t>, plan >::iterator& + >()->second.execute(idata, odata, direction) + ) + ) + { // assert(it_ != LEAKY_cache.end()); it_->second.execute(idata, odata, direction); } }; -template::value, int> =0> +template::value, int> =0, typename = decltype(raw_pointer_cast(std::declval().base()))> auto dft(std::array which, In const& in, Out&& out, int sgn) ->decltype(cufft::cached_plan{which, in.layout(), out.layout()}.execute(in.base(), out.base(), sgn), std::forward(out)) { return cufft::cached_plan{which, in.layout(), out.layout()}.execute(in.base(), out.base(), sgn), std::forward(out); } -template::value, int> =0> +template::value, int> =0, typename = decltype(raw_pointer_cast(std::declval().base()))> auto dft(std::array which, In const& in, Out&& out, int sgn) ->decltype(cufft::cached_plan::rebind_alloc*/ >{which, in.layout(), out.layout()/*, i.get_allocator()*/}.execute(in.base(), out.base(), sgn), std::forward(out)) { return cufft::cached_plan::rebind_alloc*/ >{which, in.layout(), out.layout()/*, i.get_allocator()*/}.execute(in.base(), out.base(), sgn), std::forward(out); } template//, std::enable_if_t::value, int> =0> -auto dft_forward(std::array which, In const& in, Out&& out) -> Out&& { +auto dft_forward(std::array which, In const& in, Out&& out) -> Out&& { //->decltype(cufft::plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), cufft::forward), std::forward(o)) { return cufft::cached_plan{which, in.layout(), out.layout()}.execute(in.base(), out.base(), cufft::forward), std::forward(out); } diff --git a/include/boost/multi/adaptors/cufft/test/CMakeLists.txt b/include/boost/multi/adaptors/cufft/test/CMakeLists.txt index 6f01b0ee3..ce7a9ba17 100644 --- a/include/boost/multi/adaptors/cufft/test/CMakeLists.txt +++ b/include/boost/multi/adaptors/cufft/test/CMakeLists.txt @@ -71,14 +71,14 @@ include(CTest) include_directories(${CMAKE_BINARY_DIR}) # file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) -set(TEST_SRCS cufft.cpp) +#set(TEST_SRCS cufft.cpp) foreach(TEST_FILE ${TEST_SRCS}) set(TEST_EXE "${TEST_FILE}.x") add_executable(${TEST_EXE} ${TEST_FILE}) if(ENABLE_CUDA OR DEFINED CXXCUDA) set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - target_compile_options(${TEST_EXE} PRIVATE -std=c++17) + # target_compile_options(${TEST_EXE} PRIVATE -std=c++17) endif() # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") # needed by Boost.Test and NVCC diff --git a/include/boost/multi/adaptors/fft.hpp b/include/boost/multi/adaptors/fft.hpp index 8c513b85b..f1d2ffeea 100644 --- a/include/boost/multi/adaptors/fft.hpp +++ b/include/boost/multi/adaptors/fft.hpp @@ -167,7 +167,7 @@ namespace boost::multi::fft{ #endif template auto dft_all(In&& in) { - auto const all_true = std::apply([](auto... es) { return std::array{(es, true)...}; }, std::array::dimensionality>{}); + auto const all_true = std::apply([](auto... es) { return std::array{((void)es, true)...}; }, std::array::dimensionality>{}); return dft(all_true, std::forward(in), fft::forward); } diff --git a/include/boost/multi/adaptors/fftw/test/CMakeLists.txt b/include/boost/multi/adaptors/fftw/test/CMakeLists.txt index 18d5c92bd..a294b7137 100644 --- a/include/boost/multi/adaptors/fftw/test/CMakeLists.txt +++ b/include/boost/multi/adaptors/fftw/test/CMakeLists.txt @@ -58,7 +58,7 @@ foreach(TEST_FILE ${TEST_SRCS}) add_executable(${TEST_EXE} ${TEST_FILE}) if(ENABLE_CUDA OR DEFINED CXXCUDA) set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - target_compile_options(${TEST_EXE} PRIVATE -std=c++17) + # target_compile_options(${TEST_EXE} PRIVATE -std=c++17) endif() target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) diff --git a/include/boost/multi/adaptors/tblis/test/CMakeLists.txt b/include/boost/multi/adaptors/tblis/test/CMakeLists.txt index 7874678de..966a4066e 100644 --- a/include/boost/multi/adaptors/tblis/test/CMakeLists.txt +++ b/include/boost/multi/adaptors/tblis/test/CMakeLists.txt @@ -60,7 +60,7 @@ foreach(TEST_FILE ${TEST_SRCS}) add_executable(${TEST_EXE} ${TEST_FILE}) if(ENABLE_CUDA OR DEFINED CXXCUDA) set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - target_compile_options(${TEST_EXE} PRIVATE -std=c++17) + # target_compile_options(${TEST_EXE} PRIVATE -std=c++17) endif() # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") From b3be162f253024aacc6fc088255ae1bde2395b19 Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Fri, 7 Mar 2025 19:59:47 -0800 Subject: [PATCH 5/7] rm static assert --- include/boost/multi/adaptors/cufft.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/boost/multi/adaptors/cufft.hpp b/include/boost/multi/adaptors/cufft.hpp index 10bcc7a6b..ca036947e 100644 --- a/include/boost/multi/adaptors/cufft.hpp +++ b/include/boost/multi/adaptors/cufft.hpp @@ -185,8 +185,6 @@ class plan { } } - static_assert(sizeof(ILayout*) == 0); - if(first_howmany_ == D) { if constexpr(std::is_same_v) { cufftSafeCall(::cufftPlanMany( From 812110be3ca06bff71e938285608c4ee76cedc16 Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Sat, 8 Mar 2025 00:49:30 -0800 Subject: [PATCH 6/7] use complete raw_ponter_cast for sfinae --- include/boost/multi/adaptors/cufft.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/multi/adaptors/cufft.hpp b/include/boost/multi/adaptors/cufft.hpp index ca036947e..975926582 100644 --- a/include/boost/multi/adaptors/cufft.hpp +++ b/include/boost/multi/adaptors/cufft.hpp @@ -401,7 +401,7 @@ class cached_plan { } }; -template::value, int> =0, typename = decltype(raw_pointer_cast(std::declval().base()))> +template::value, int> =0, typename = decltype(::thrust::raw_pointer_cast(std::declval().base()))> auto dft(std::array which, In const& in, Out&& out, int sgn) ->decltype(cufft::cached_plan{which, in.layout(), out.layout()}.execute(in.base(), out.base(), sgn), std::forward(out)) { return cufft::cached_plan{which, in.layout(), out.layout()}.execute(in.base(), out.base(), sgn), std::forward(out); } From 1a53378276d1f2eb57de4d9a44e1b3f97a280ebc Mon Sep 17 00:00:00 2001 From: Alfredo Correa Date: Sat, 8 Mar 2025 02:00:40 -0800 Subject: [PATCH 7/7] fix hipcc --- include/boost/multi/adaptors/fft.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/multi/adaptors/fft.hpp b/include/boost/multi/adaptors/fft.hpp index f1d2ffeea..c601c449d 100644 --- a/include/boost/multi/adaptors/fft.hpp +++ b/include/boost/multi/adaptors/fft.hpp @@ -25,7 +25,7 @@ namespace boost::multi::fft{ template struct priority : std::conditional_t>{}; template auto dft_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft(std::forward(args)...)) - #if defined(__CUDA__) || defined(__NVCC__) + #if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) template auto dft_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(::boost::multi::cufft ::dft(std::forward(args)...)) #endif template< class... Args> auto dft(Args&&... args) BOOST_MULTI_DECLRETURN_(dft_aux_(priority<1>{}, std::forward(args)...)) @@ -33,13 +33,13 @@ namespace boost::multi::fft{ template auto dft(std::array::dimensionality> which, In&& in, Args&&... args) BOOST_MULTI_DECLRETURN_(dft_aux(priority<1>{}, which, std::forward(in), std::forward(args)...)) template auto dft_forward_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_forward(std::forward(args)...)) - #if defined(__CUDA__) || defined(__NVCC__) + #if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) template auto dft_forward_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_forward(std::forward(args)...)) #endif template auto dft_forward(std::array::dimensionality> which, In&& in, Args&&... args) BOOST_MULTI_DECLRETURN_(dft_forward_aux(priority<1>{}, which, std::forward(in), std::forward(args)...)) template auto dft_backward_aux(priority<0> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_backward(std::forward(args)...)) - #if defined(__CUDA__) || defined(__NVCC__) + #if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__) template auto dft_backward_aux(priority<1> /*unused*/, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_backward(std::forward(args)...)) #endif template auto dft_backward(std::array which, In const& in, Args&&... args) -> decltype(auto) {return dft_backward_aux(priority<1>{}, which, in, std::forward(args)...);}