ggml-org · sandboxyer · Mar 4, 2025 · aviallon · Mar 12, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,11 @@
 cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
 project("llama.cpp" C CXX)
+
+
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+    add_compile_options(-mfma -mavx2)
+endif()
+
 include(CheckIncludeFileCXX)
 
 #set(CMAKE_WARN_DEPRECATED YES)

diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
@@ -114,7 +114,7 @@ static inline __m512 __avx512_repeat_f32cx16_load(__m128i x) {
     return _mm512_loadu_ps(tmp);
 }
 #endif
-static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) {
+static inline __m256 __avx_f32cx8_load(const ggml_fp16_t *x) {
     float tmp[8];
 
     for (int i = 0; i < 8; i++) {
@@ -123,7 +123,7 @@ static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) {
 
     return _mm256_loadu_ps(tmp);
 }
-static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) {
+static inline __m256 __avx_repeat_f32cx8_load(const ggml_fp16_t *x) {
     float tmp[8];
 
     for (int i = 0; i < 4; i++) {
@@ -133,7 +133,7 @@ static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) {
 
     return _mm256_loadu_ps(tmp);
 }
-static inline __m256 __avx_rearranged_f32cx8_load(ggml_fp16_t *x, __m128i arrangeMask) {
+static inline __m256 __avx_rearranged_f32cx8_load(const ggml_fp16_t *x, __m128i arrangeMask) {
     uint16_t tmphalf[8];
     float tmp[8];
 

diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -2,6 +2,7 @@
 #define _USE_MATH_DEFINES // For M_PI on MSVC
 
 #include "ggml-backend.h"
+#include <immintrin.h>
 #include "ggml-impl.h"
 #include "ggml-threading.h"
 #include "ggml.h"