Skip to content

Commit

Permalink
Merge pull request #7 from MarioSieg/develop
Browse files Browse the repository at this point in the history
Runtime CPU detection and blas specializations.
  • Loading branch information
MarioSieg authored Jan 24, 2025
2 parents eaf8b70 + 8a5adb5 commit 552ebe5
Show file tree
Hide file tree
Showing 31 changed files with 760 additions and 523 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/cmake-multi-platform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm]
build_type: [Release]
c_compiler: [gcc, clang, cl]
include:
Expand All @@ -26,6 +26,12 @@ jobs:
- os: ubuntu-latest
c_compiler: clang
cpp_compiler: clang++
- os: ubuntu-24.04-arm
c_compiler: gcc
cpp_compiler: g++
- os: ubuntu-24.04-arm
c_compiler: clang
cpp_compiler: clang++
- os: macos-latest
c_compiler: clang
cpp_compiler: clang++
Expand All @@ -36,6 +42,8 @@ jobs:
c_compiler: clang
- os: ubuntu-latest
c_compiler: cl
- os: ubuntu-24.04-arm
c_compiler: cl
- os: macos-latest
c_compiler: gcc
- os: macos-latest
Expand Down Expand Up @@ -103,4 +111,4 @@ jobs:
shell: bash
run: |
. .venv/bin/activate
python -m pytest ${{ github.workspace }}/python/tests/tests.py
python -m pytest ${{ github.workspace }}/python/tests/*
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ out/
.tmp
CMakeSettings.json
magnetron_chat/.idea
/build/
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ endif()

include(cmake/arch.cmake)
include(cmake/lib.cmake)
include(cmake/comflags.cmake)
include(cmake/compiler_config.cmake)

if (${MAGNETRON_ENABLE_CUDA})
include(cmake/cuda.cmake)
Expand Down
27 changes: 15 additions & 12 deletions cmake/blas_tune.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,28 @@ function(set_blas_spec_arch filename posix_arch msvc_arch)
endfunction()

set(MAGNETRON_BLAS_SPEC_AMD64_SOURCES
magnetron/magnetron_cpu_blas_amd64_sse42.c
magnetron/magnetron_cpu_blas_amd64_avx.c
magnetron/magnetron_cpu_blas_amd64_avx2.c
magnetron/magnetron_cpu_blas_amd64_avx512f.c
magnetron/magnetron_cpu_blas_amd64_znver4.c
magnetron/magnetron_cpu_blas_amd64_v2.c
magnetron/magnetron_cpu_blas_amd64_v2_5.c
magnetron/magnetron_cpu_blas_amd64_v3.c
magnetron/magnetron_cpu_blas_amd64_v4.c
magnetron/magnetron_cpu_blas_amd64_v4_5.c
)

set(MAGNETRON_BLAS_SPEC_ARM64_SOURCES
magnetron/magnetron_cpu_blas_arm64_82.c
magnetron/magnetron_cpu_blas_arm64_v8_2.c
magnetron/magnetron_cpu_blas_arm64_v9.c
)

if (${IS_AMD64}) # x86-64 specific compilation options
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_AMD64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_amd64_sse42.c" "-mtune=nehalem -msse4.2" "/arch:SSE4.2")
set_blas_spec_arch("magnetron_cpu_blas_amd64_avx.c" "-mtune=sandybridge -mavx" "/arch:AVX")
set_blas_spec_arch("magnetron_cpu_blas_amd64_avx2.c" "-mtune=skylake -mavx -mavx2 -mfma -mf16c" "/arch:AVX2")
set_blas_spec_arch("magnetron_cpu_blas_amd64_avx512f.c" "-mtune=cannonlake -mavx -mavx2 -mfma -mf16c -mavx512f" "/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_znver4.c" "-mavx -mavx2 -mfma -mf16c -mavx512f -mavx512vl -mavx512vnni -mavx512bf16 -mavx512bw -mavx512dq" "/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2.c" "-mtune=nehalem -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2" "/arch:SSE4.2")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2_5.c" "-mtune=ivybridge -mavx -mno-avx2 -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2" "/arch:AVX")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v3.c" "-mtune=haswell -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
"/arch:AVX2 /D__BMI__=1 /D__BMI2__=1 /D__F16C__=1 /D__FMA__=1") # MSVC is just annoying
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4.c" "-mtune=cannonlake -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe" "/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4_5.c" "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe" "/arch:AVX512")
elseif(${IS_ARM64})
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_ARM64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_arm64_82.c" "-march=armv8.2-a+dotprod+fp16" "")
set_blas_spec_arch("magnetron_cpu_blas_arm64_v8_2.c" "-march=armv8.2-a+dotprod+fp16" "")
set_blas_spec_arch("magnetron_cpu_blas_arm64_v9.c" "-march=armv9-a+sve+sve2" "")
endif()
9 changes: 9 additions & 0 deletions cmake/comflags.cmake → cmake/compiler_config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(MAG_CLANG_COMPILE_FLAGS
set(MAG_CLANG_RELEASE_COMPILE_FLAGS
-O3
-flto
-fomit-frame-pointer
)
set(MAG_CLANG_LINK_OPTIONS "")
set(MAG_CLANG_RELEASE_LINK_OPTIONS -flto)
Expand All @@ -50,6 +51,14 @@ set(MAG_GCC_RELEASE_COMPILE_FLAGS
set(MAG_GCC_LINK_OPTIONS "")
set(MAG_GCC_RELEASE_LINK_OPTIONS -flto)

if (${IS_ARM64})
set(MAG_CLANG_COMPILE_FLAGS ${MAG_CLANG_COMPILE_FLAGS} -march=armv8-a)
set(MAG_GCC_COMPILE_FLAGS ${MAG_CLANG_COMPILE_FLAGS} -march=armv8-a)
elseif (${IS_AMD64})
set(MAG_CLANG_COMPILE_FLAGS ${MAG_CLANG_COMPILE_FLAGS} -msse -msse2)
set(MAG_GCC_COMPILE_FLAGS ${MAG_CLANG_COMPILE_FLAGS} -msse -msse2)
endif()

if (WIN32) # Windows (MSVC) specific config
target_compile_options(magnetron PRIVATE ${MAG_MSVC_COMPILE_FLAGS})
target_link_options(magnetron PRIVATE ${MAG_MSVC_LINK_OPTIONS})
Expand Down
Loading

0 comments on commit 552ebe5

Please sign in to comment.