Skip to content

Commit

Permalink
conditional avx512bf16
Browse files Browse the repository at this point in the history
  • Loading branch information
MarioSieg committed Feb 18, 2025
1 parent da03cad commit 2658389
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions cmake/blas_tune.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,34 @@ set(MAGNETRON_BLAS_SPEC_ARM64_SOURCES
magnetron/magnetron_cpu_blas_arm64_v9.c
)

if (${IS_AMD64}) # x86-64 specific compilation options
if(${IS_AMD64}) # x86-64 specific compilation options
include(CheckCCompilerFlag)
# Check for support of -mavx512bf16, some older GCC versions don't support it
check_c_compiler_flag("-mavx512bf16" COMPILER_SUPPORTS_MAVX512BF16)
if(NOT COMPILER_SUPPORTS_MAVX512BF16)
message(WARNING "Compiler does NOT support -mavx512bf16; removing it from the flags for magnetron_cpu_blas_amd64_v4_5.c")
set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
else()
set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
endif()
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_AMD64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2.c" "-mtune=nehalem -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2" "/arch:SSE4.2")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2_5.c" "-mtune=ivybridge -mavx -mno-avx2 -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2" "/arch:AVX")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v3.c" "-mtune=haswell -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
"/arch:AVX2 /D__BMI__=1 /D__BMI2__=1 /D__F16C__=1 /D__FMA__=1") # MSVC is just annoying
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4.c" "-mtune=cannonlake -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe" "/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4_5.c" "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe" "/arch:AVX512")
elseif(${IS_ARM64})
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2.c"
"-mtune=nehalem -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2"
"/arch:SSE4.2")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2_5.c"
"-mtune=ivybridge -mavx -mno-avx2 -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2"
"/arch:AVX")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v3.c"
"-mtune=haswell -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
"/arch:AVX2 /D__BMI__=1 /D__BMI2__=1 /D__F16C__=1 /D__FMA__=1") # MSVC is just annoying
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4.c"
"-mtune=cannonlake -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
"/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4_5.c"
"${AVX512_FLAGS}"
"/arch:AVX512")
elseif(${IS_ARM64})
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_ARM64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_arm64_v8_2.c" "-march=armv8.2-a+dotprod+fp16" "")
set_blas_spec_arch("magnetron_cpu_blas_arm64_v9.c" "-march=armv9-a+sve+sve2" "")
endif()
endif()

0 comments on commit 2658389

Please sign in to comment.