Skip to content

Commit a902fb4

Browse files
jdomkedomke
authored andcommitted
ggml : reading the runtime sve config of the cpu (llama/8709)
* ggml : reading the runtime sve config of the cpu * change to one time init to prevent performance drop * prefix variable to avoid possible conflicts * revert xxhash fix and add brackets --------- Co-authored-by: domke <[email protected]>
1 parent 6cb38c3 commit a902fb4

File tree

5 files changed

+30
-16
lines changed

5 files changed

+30
-16
lines changed

ggml/src/ggml-aarch64.c

+14-14
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
386386
UNUSED(blocklen);
387387

388388
#if defined(__ARM_FEATURE_SVE)
389-
if (svcntw() == 8) {
390-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
389+
if (ggml_sve_cnt_b == QK8_0) {
390+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
391391
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
392392
}
393393
#endif
@@ -498,8 +498,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
498498
UNUSED(blocklen);
499499

500500
#if defined(__ARM_FEATURE_SVE)
501-
if (svcntw() == 8) {
502-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
501+
if (ggml_sve_cnt_b == QK8_0) {
502+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
503503
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
504504
}
505505
#endif
@@ -616,7 +616,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
616616
UNUSED(blocklen);
617617

618618
#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
619-
if (svcntw() == 8) {
619+
if (ggml_sve_cnt_b == QK8_0) {
620620
const void * b_ptr = vx;
621621
const void * a_ptr = vy;
622622
float * res_ptr = s;
@@ -682,12 +682,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
682682
return;
683683
}
684684
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
685-
GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
685+
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
686686
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
687687
"performance");
688688
}
689689
else if (ggml_cpu_has_neon()) {
690-
GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
690+
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
691691
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
692692
"quantization format for optimal performance");
693693
}
@@ -747,8 +747,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
747747
UNUSED(blocklen);
748748

749749
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
750-
if (svcntw() == 8) {
751-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
750+
if (ggml_sve_cnt_b == QK8_0) {
751+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
752752
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
753753
}
754754
#endif
@@ -1268,8 +1268,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
12681268
UNUSED(blocklen);
12691269

12701270
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
1271-
if (svcntw() == 8) {
1272-
GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
1271+
if (ggml_sve_cnt_b == QK8_0) {
1272+
GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
12731273
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
12741274
}
12751275
#endif
@@ -1730,7 +1730,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
17301730
UNUSED(blocklen);
17311731

17321732
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
1733-
if (svcntw() == 8) {
1733+
if (ggml_sve_cnt_b == QK8_0) {
17341734
const void * b_ptr = vx;
17351735
const void * a_ptr = vy;
17361736
float * res_ptr = s;
@@ -2141,12 +2141,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
21412141
return;
21422142
}
21432143
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
2144-
GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
2144+
GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
21452145
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
21462146
"performance");
21472147
}
21482148
else if (ggml_cpu_has_neon()) {
2149-
GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
2149+
GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
21502150
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
21512151
"quantization format for optimal performance");
21522152
}

ggml/src/ggml-impl.h

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ extern "C" {
143143

144144
#if defined(__ARM_FEATURE_SVE)
145145
#include <arm_sve.h>
146+
#include <sys/prctl.h>
146147
#endif
147148

148149
// 16-bit float

ggml/src/ggml-quants.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -3818,7 +3818,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
38183818
float sumf = 0;
38193819

38203820
#if defined(__ARM_FEATURE_SVE)
3821-
if (svcntb() == QK8_0) {
3821+
if (ggml_sve_cnt_b == QK8_0) {
38223822
const svbool_t ptrueh = svptrue_pat_b8(SV_VL16);
38233823
const svbool_t ptruel = svnot_b_z(svptrue_b8(), ptrueh);
38243824

@@ -5303,7 +5303,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
53035303
float sumf = 0;
53045304

53055305
#if defined(__ARM_FEATURE_SVE)
5306-
if (svcntb() == QK8_0) {
5306+
if (ggml_sve_cnt_b == QK8_0) {
53075307
svfloat32_t sumv0 = svdup_n_f32(0.0f);
53085308
svfloat32_t sumv1 = svdup_n_f32(0.0f);
53095309

ggml/src/ggml-quants.h

+4
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ void iq2xs_free_impl(enum ggml_type type);
127127
void iq3xs_init_impl(int grid_size);
128128
void iq3xs_free_impl(int grid_size);
129129

130+
#if defined(__ARM_FEATURE_SVE)
131+
extern int ggml_sve_cnt_b;
132+
#endif
133+
130134
#ifdef __cplusplus
131135
}
132136
#endif

ggml/src/ggml.c

+9
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
#include <unistd.h>
3838
#endif
3939

40+
#if defined(__ARM_FEATURE_SVE)
41+
int ggml_sve_cnt_b = 0;
42+
#endif
4043
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
4144
#undef GGML_USE_LLAMAFILE
4245
#endif
@@ -3561,6 +3564,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
35613564

35623565
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
35633566

3567+
#if defined(__ARM_FEATURE_SVE)
3568+
if (!ggml_sve_cnt_b) {
3569+
ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
3570+
}
3571+
#endif
3572+
35643573
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
35653574

35663575
ggml_critical_section_end();

0 commit comments

Comments
 (0)