Skip to content

Commit

Permalink
runtime dispatching with MIPP library, papi for performance measurement
Browse files Browse the repository at this point in the history
* pf_conv_* libraries for naive convolution
with architecture specific optimizations - utilizing MIPP library for SIMD
and benchmark utilitzing papi for measurement
* MIPP with [un]install patch from https://github.com/hayguen/MIPP
  should be installed
* bench_mixers also uses papi (if available)
* fixed address sanitizer (asan)
* removed cmake options DISABLE_SIMD_AVX, USE_SIMD_NEON
* target_optimizations.cmake:
 - added optional additional(extra) options to target_set_c[xx]_arch_flags()
 - added gcc_clang_fpu options to target_set_cxx_arch_option() macro
* with 'neon' variants for dispatching on cmake processor 'armv7l'
  papi doesn't work on this 32 bit Raspbian on the Raspberry Pi 4B hardware,
  but linux 'time' shows a total speedup of nearly factor 2
  comparing '-a 0' (none) agains '-a <n>' (neon)

Signed-off-by: hayati ayguen <[email protected]>
  • Loading branch information
hayguen committed Feb 5, 2022
1 parent 08bd199 commit dc74fd4
Show file tree
Hide file tree
Showing 16 changed files with 1,734 additions and 228 deletions.
221 changes: 179 additions & 42 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ option(USE_TYPE_DOUBLE "activate 'double' precision float?" ON)

# architecture/optimization options
option(USE_SIMD "use SIMD (SSE/AVX/NEON/ALTIVEC) CPU features? - " ON)
option(DISABLE_SIMD_AVX "disable AVX CPU features? - " OFF)
option(USE_SIMD_NEON "force using NEON on ARM? (requires USE_SIMD)" OFF)
option(USE_SCALAR_VECT "use 4-element vector scalar operations (if no other SIMD)" ON)

# what to install?
Expand Down Expand Up @@ -46,6 +44,18 @@ if ( (NOT USE_TYPE_FLOAT) AND (NOT USE_TYPE_DOUBLE) )
message(FATAL_ERROR "activate at least one of USE_TYPE_FLOAT or USE_TYPE_DOUBLE")
endif()

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
include(cmake/target_optimizations.cmake)
include(cmake/compiler_warnings.cmake)
find_package(PAPI)
find_package(MIPP)
if (MIPP_FOUND)
# if (TARGET MIPP)
message(STATUS "found MIPP")
else()
message(STATUS "NOT found MIPP")
endif()


if (USE_DEBUG_ASAN)
set(ASANLIB "asan")
Expand Down Expand Up @@ -147,40 +157,18 @@ endif()
add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_common.c pffft_priv_impl.h pffft.hpp )
set_target_properties(PFFFT PROPERTIES OUTPUT_NAME "pffft")
target_compile_definitions(PFFFT PRIVATE _USE_MATH_DEFINES)
target_activate_c_compiler_warnings(PFFFT)
if (USE_SCALAR_VECT)
target_compile_definitions(PFFFT PRIVATE PFFFT_SCALVEC_ENABLED=1)
endif()
if (USE_DEBUG_ASAN)
target_compile_options(PFFFT PRIVATE "-fsanitize=address")
endif()
target_set_c_arch_flags(PFFFT)
if (NOT USE_SIMD)
target_compile_definitions(PFFFT PRIVATE PFFFT_SIMD_DISABLE=1)
endif()
if (USE_SIMD AND USE_SIMD_NEON)
target_compile_definitions(PFFFT PRIVATE PFFFT_ENABLE_NEON=1)
target_compile_options(PFFFT PRIVATE "-mfpu=neon")
endif()
if (USE_SIMD AND USE_TYPE_DOUBLE)
if(MSVC)
if(DISABLE_SIMD_AVX)
set_property(SOURCE pffft_double.c PROPERTY COMPILE_FLAGS "/arch:SSE2")
else()
set_property(SOURCE pffft_double.c PROPERTY COMPILE_FLAGS "/arch:AVX")
endif()
elseif(CMAKE_COMPILER_IS_GNUCC)
if(DISABLE_SIMD_AVX)
set_property(SOURCE pffft_double.c PROPERTY COMPILE_FLAGS "-msse2")
else()
set_property(SOURCE pffft_double.c PROPERTY COMPILE_FLAGS "-mavx")
endif()
else()
set_property(SOURCE pffft_double.c PROPERTY COMPILE_FLAGS "-march=native")
endif()
if(DISABLE_SIMD_AVX)
target_compile_definitions(PFFFT PRIVATE PFFFT_AVX_DISABLE=1)
endif()
endif()
target_link_libraries( PFFFT ${MATHLIB} )
target_link_libraries( PFFFT ${ASANLIB} ${MATHLIB} )
set_property(TARGET PFFFT APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)
Expand All @@ -193,23 +181,25 @@ endif()

if (USE_TYPE_FLOAT)

add_library(PFDSP STATIC pf_mixer.cpp pf_mixer.h pf_carrier.cpp pf_carrier.h pf_cic.cpp pf_cic.h fmv.h )
add_library(PFDSP STATIC pf_mixer.cpp pf_mixer.h pf_cplx.h pf_carrier.cpp pf_carrier.h pf_cic.cpp pf_cic.h fmv.h )
set_property(TARGET PFDSP PROPERTY CXX_STANDARD 11)
set_property(TARGET PFDSP PROPERTY CXX_STANDARD_REQUIRED ON)
set_target_properties(PFDSP PROPERTIES OUTPUT_NAME "pfdsp")
target_compile_definitions(PFDSP PRIVATE _USE_MATH_DEFINES)
target_activate_cxx_compiler_warnings(PFDSP)
if (USE_DEBUG_ASAN)
target_compile_options(PFDSP PRIVATE "-fsanitize=address")
endif()
if (USE_SIMD AND USE_SIMD_NEON)
target_compile_definitions(PFDSP PRIVATE PFFFT_ENABLE_NEON=1)
target_compile_options(PFDSP PRIVATE "-march=armv7-a" "-mfpu=neon")
if (USE_SIMD)
target_set_cxx_arch_flags(PFDSP)
endif()
target_link_libraries( PFDSP ${MATHLIB} )
set_property(TARGET PFDSP APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)
if (INSTALL_PFDSP)
set(INSTALL_TARGETS ${INSTALL_TARGETS} PFDSP)
set(INSTALL_HEADERS ${INSTALL_HEADERS} pf_mixer.h pf_carrier.h pf_cic.h)
set(INSTALL_HEADERS ${INSTALL_HEADERS} pf_mixer.h pf_cplx.h pf_carrier.h pf_cic.h)
endif()
endif()

Expand All @@ -220,7 +210,7 @@ if (USE_FFTPACK)
# float / single precision
add_library(FFTPACK_FLOAT STATIC fftpack.c fftpack.h)
target_compile_definitions(FFTPACK_FLOAT PRIVATE _USE_MATH_DEFINES)
target_compile_options(FFTPACK_FLOAT PRIVATE $<$<C_COMPILER_ID:GNU>:-Wall -Wextra -pedantic>)
target_activate_c_compiler_warnings(FFTPACK_FLOAT)
target_link_libraries( FFTPACK_FLOAT ${MATHLIB} )
set_property(TARGET FFTPACK_FLOAT APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
Expand All @@ -230,7 +220,7 @@ if (USE_FFTPACK)
add_library(FFTPACK_DOUBLE STATIC fftpack.c fftpack.h)
target_compile_definitions(FFTPACK_DOUBLE PRIVATE _USE_MATH_DEFINES)
target_compile_definitions(FFTPACK_DOUBLE PUBLIC FFTPACK_DOUBLE_PRECISION)
target_compile_options(FFTPACK_DOUBLE PRIVATE $<$<C_COMPILER_ID:GNU>:-Wall -Wextra -pedantic>)
target_activate_c_compiler_warnings(FFTPACK_DOUBLE)
target_link_libraries( FFTPACK_DOUBLE ${MATHLIB} )
set_property(TARGET FFTPACK_DOUBLE APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
Expand All @@ -254,6 +244,7 @@ if (USE_TYPE_FLOAT)
add_library(PFFASTCONV STATIC pffastconv.c pffastconv.h pffft.h )
set_target_properties(PFFASTCONV PROPERTIES OUTPUT_NAME "pffastconv")
target_compile_definitions(PFFASTCONV PRIVATE _USE_MATH_DEFINES)
target_activate_c_compiler_warnings(PFFASTCONV)
if (USE_DEBUG_ASAN)
target_compile_options(PFFASTCONV PRIVATE "-fsanitize=address")
endif()
Expand Down Expand Up @@ -343,14 +334,12 @@ if (USE_TYPE_FLOAT)
if (USE_DEBUG_ASAN)
target_compile_options(test_pffastconv PRIVATE "-fsanitize=address")
endif()
target_set_c_arch_flags(test_pffastconv)
if (NOT USE_SIMD)
target_compile_definitions(test_pffastconv PRIVATE PFFFT_SIMD_DISABLE=1)
endif()
if (USE_SIMD AND USE_SIMD_NEON)
target_compile_definitions(test_pffastconv PRIVATE PFFFT_ENABLE_NEON=1)
target_compile_options(test_pffastconv PRIVATE "-mfpu=neon")
endif()
target_link_libraries( test_pffastconv PFFASTCONV ${ASANLIB} ${MATHLIB} )

endif()

######################################################
Expand All @@ -359,6 +348,9 @@ if (USE_TYPE_FLOAT)
add_executable(bench_pffft_float bench_pffft.c pffft.h)
target_compile_definitions(bench_pffft_float PRIVATE _USE_MATH_DEFINES)
target_compile_definitions(bench_pffft_float PRIVATE PFFFT_ENABLE_FLOAT)
if (USE_DEBUG_ASAN)
target_compile_options(bench_pffft_float PRIVATE "-fsanitize=address")
endif()

target_link_libraries( bench_pffft_float PFFFT ${ASANLIB} )

Expand Down Expand Up @@ -391,7 +383,7 @@ if (USE_TYPE_FLOAT)
if ( (CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") )
# has chances to work
else()
# other PROCESSORs could be "ppc", "ppc64", "arm", "aarch64" - or something else?!
# other PROCESSORs could be "ppc", "ppc64", "arm", "aarch64", "armv7l" - or something else?!
message(WARNING "using Intel MKL on '${CMAKE_SYSTEM_PROCESSOR}' might fail.")
endif()
message(STATUS "In case compiling/linking with Intel MKL fails, check CMakeLists.txt or deactivate USE_BENCH_MKL")
Expand All @@ -404,6 +396,9 @@ if (USE_TYPE_DOUBLE)
add_executable(bench_pffft_double bench_pffft.c pffft.h)
target_compile_definitions(bench_pffft_double PRIVATE _USE_MATH_DEFINES)
target_compile_definitions(bench_pffft_double PRIVATE PFFFT_ENABLE_DOUBLE)
if (USE_DEBUG_ASAN)
target_compile_options(bench_pffft_double PRIVATE "-fsanitize=address")
endif()
target_link_libraries( bench_pffft_double PFFFT ${ASANLIB} )

if (USE_FFTPACK)
Expand All @@ -425,7 +420,7 @@ if (USE_TYPE_DOUBLE)
if ( (CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") )
# has chances to work
else()
# other PROCESSORs could be "ppc", "ppc64", "arm", "aarch64" - or something else?!
# other PROCESSORs could be "ppc", "ppc64", "arm", "aarch64", "armv7l" - or something else?!
message(WARNING "using Intel MKL on '${CMAKE_SYSTEM_PROCESSOR}' might fail.")
endif()
message(STATUS "In case compiling/linking with Intel MKL fails, check CMakeLists.txt or deactivate USE_BENCH_MKL")
Expand All @@ -437,11 +432,153 @@ endif()
######################################################

if (USE_TYPE_FLOAT)
add_executable(bench_pf_mixer_float bench_mixers.c)

add_executable(bench_pf_mixer_float bench_mixers.cpp papi_perf_counter.h)
target_compile_definitions(bench_pf_mixer_float PRIVATE _USE_MATH_DEFINES)
target_compile_definitions(bench_pf_mixer_float PRIVATE PFFFT_ENABLE_FLOAT)
target_link_libraries( bench_pf_mixer_float ${ASANLIB} )
if (USE_DEBUG_ASAN)
target_compile_options(bench_pf_mixer_float PRIVATE "-fsanitize=address")
endif()
if (PAPI_FOUND)
target_compile_definitions(bench_pf_mixer_float PRIVATE HAVE_PAPI=1)
target_link_libraries(bench_pf_mixer_float ${PAPI_LIBRARIES})
endif()
target_link_libraries( bench_pf_mixer_float PFDSP )


############################################################################

add_library(pf_conv_arch_none pf_conv.cpp pf_conv.h pf_cplx.h)
target_compile_definitions(pf_conv_arch_none PRIVATE CONV_ARCH_POST=none MIPP_NO_INTRINSICS=1)
set_property(TARGET pf_conv_arch_none PROPERTY CXX_STANDARD 11)
set_property(TARGET pf_conv_arch_none PROPERTY CXX_STANDARD_REQUIRED ON)
target_activate_cxx_compiler_warnings(pf_conv_arch_none)
add_library(pf_conv_dispatcher pf_conv_dispatcher.cpp pf_conv_dispatcher.h pf_conv.h pf_cplx.h)
set_property(TARGET pf_conv_dispatcher PROPERTY CXX_STANDARD 11)
set_property(TARGET pf_conv_dispatcher PROPERTY CXX_STANDARD_REQUIRED ON)
target_activate_cxx_compiler_warnings(pf_conv_dispatcher)

add_library(pf_conv_arch_dflt pf_conv.cpp pf_conv.h pf_cplx.h)
target_compile_definitions(pf_conv_arch_dflt PRIVATE CONV_ARCH_POST=dflt)
set_property(TARGET pf_conv_arch_dflt PROPERTY CXX_STANDARD 11)
set_property(TARGET pf_conv_arch_dflt PROPERTY CXX_STANDARD_REQUIRED ON)
target_activate_cxx_compiler_warnings(pf_conv_arch_dflt)
target_set_cxx_arch_flags(pf_conv_arch_dflt)

target_link_libraries(pf_conv_dispatcher pf_conv_arch_none pf_conv_arch_dflt)

if ((CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64"))

if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(PF_CONV_ARCHES "sse3;sse4;avx;avx2")
set(PF_CONV_OPT_sse3 "core2") # emulate a map
set(PF_CONV_OPT_sse4 "nehalem")
set(PF_CONV_OPT_avx "sandybridge")
set(PF_CONV_OPT_avx2 "haswell")
target_compile_definitions(pf_conv_dispatcher PRIVATE CONV_ARCH_GCC_AMD64)

elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(PF_CONV_ARCHES "sse2;avx;avx2")
set(PF_CONV_OPT_sse2 "SSE2") # emulate a map
set(PF_CONV_OPT_avx "AVX")
set(PF_CONV_OPT_avx2 "AVX2")
target_compile_definitions(pf_conv_dispatcher PRIVATE CONV_ARCH_MSVC_AMD64)

else()
set(PF_CONV_ARCHES "")
message(WARNING "unknown compiler ${CMAKE_CXX_COMPILER_ID} on CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}: can't do architecture specific compilation")
endif()

elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")

if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(PF_CONV_ARCHES "armv8a")
set(PF_CONV_OPT_armv8a "armv8-a") # emulate a map for arch

target_compile_definitions(pf_conv_dispatcher PRIVATE CONV_ARCH_GCC_AARCH64)
else()
set(PF_CONV_ARCHES "")
message(WARNING "unknown compiler ${CMAKE_CXX_COMPILER_ID} on CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}: can't do architecture specific compilation")
endif()

elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "armv7l")

if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(PF_CONV_ARCHES "neon_vfpv4;neon_rpi3_a53;neon_rpi4_a72")
set(PF_CONV_OPT_neon_vfpv4 "armv7-a") # emulate a map for arch
set(PF_CONV_EXTRA_neon_vfpv4 "neon_vfpv4") # emulate a map for additional options (EXTRA)
set(PF_CONV_OPT_neon_rpi3_a53 "armv7-a")
set(PF_CONV_EXTRA_neon_rpi3_a53 "neon_rpi3_a53")
set(PF_CONV_OPT_neon_rpi4_a72 "armv7-a")
set(PF_CONV_EXTRA_neon_rpi4_a72 "neon_rpi4_a72")

target_compile_definitions(pf_conv_dispatcher PRIVATE CONV_ARCH_GCC_ARM32NEON)
else()
set(PF_CONV_ARCHES "")
message(WARNING "unknown compiler ${CMAKE_CXX_COMPILER_ID} on CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}: can't do architecture specific compilation")
endif()

else()
message(WARNING "this is unforseen CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}: can't do architecture specific compilation")
endif()

foreach (arch_opt ${PF_CONV_ARCHES})
add_library(pf_conv_arch_${arch_opt} pf_conv.cpp pf_conv.h pf_cplx.h)
set_property(TARGET pf_conv_arch_${arch_opt} PROPERTY CXX_STANDARD 11)
set_property(TARGET pf_conv_arch_${arch_opt} PROPERTY CXX_STANDARD_REQUIRED ON)
target_activate_cxx_compiler_warnings(pf_conv_arch_${arch_opt})
target_compile_definitions(pf_conv_arch_${arch_opt} PRIVATE CONV_ARCH_POST=${arch_opt})

target_set_cxx_arch_option(pf_conv_arch_${arch_opt} "${PF_CONV_OPT_${arch_opt}}" "${PF_CONV_EXTRA_${arch_opt}}" "${PF_CONV_OPT_${arch_opt}}")
target_link_libraries(pf_conv_dispatcher pf_conv_arch_${arch_opt})
message(STATUS "added library pf_conv_arch_${arch_opt} with CONV_ARCH_POST=${arch_opt}")
endforeach()

if (USE_DEBUG_ASAN)
foreach (arch_opt ${PF_CONV_ARCHES})
target_compile_options(pf_conv_arch_${arch_opt} PRIVATE "-fsanitize=address")
target_link_libraries( pf_conv_arch_${arch_opt} ${ASANLIB})
endforeach()

target_compile_options(pf_conv_arch_none PRIVATE "-fsanitize=address")
target_link_libraries( pf_conv_arch_none ${ASANLIB})

target_compile_options(pf_conv_dispatcher PRIVATE "-fsanitize=address")
target_link_libraries(pf_conv_dispatcher ${ASANLIB})
endif()

if(MIPP_FOUND)
foreach (arch_opt ${PF_CONV_ARCHES})
message(STATUS "link pf_conv_arch_${arch_opt} against MIPP")
target_link_libraries(pf_conv_arch_${arch_opt} MIPP)
endforeach()

message(STATUS "link pf_conv_arch_none against MIPP")
target_link_libraries(pf_conv_arch_none MIPP)
endif()

############################################################################


add_executable(bench_pf_conv_float bench_conv.cpp papi_perf_counter.h)
set_property(TARGET bench_pf_conv_float PROPERTY CXX_STANDARD 11)
set_property(TARGET bench_pf_conv_float PROPERTY CXX_STANDARD_REQUIRED ON)
target_compile_definitions(bench_pf_conv_float PRIVATE _USE_MATH_DEFINES)
target_compile_definitions(bench_pf_conv_float PRIVATE PFFFT_ENABLE_FLOAT)
if (USE_DEBUG_ASAN)
target_compile_options(bench_pf_conv_float PRIVATE "-fsanitize=address")
endif()
target_link_libraries( bench_pf_conv_float ${ASANLIB} )
if (PAPI_FOUND)
target_compile_definitions(bench_pf_conv_float PRIVATE HAVE_PAPI=1)
target_link_libraries(bench_pf_conv_float ${PAPI_LIBRARIES})
endif()
if(MIPP_FOUND)
target_link_libraries(bench_pf_conv_float MIPP)
endif()

target_link_libraries( bench_pf_mixer_float PFDSP ${ASANLIB} )
target_link_libraries( bench_pf_conv_float pf_conv_dispatcher PFDSP )

endif()

Expand Down
Loading

0 comments on commit dc74fd4

Please sign in to comment.