@@ -386,8 +386,8 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
386
386
UNUSED (blocklen );
387
387
388
388
#if defined(__ARM_FEATURE_SVE )
389
- if (svcntw () == 8 ) {
390
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
389
+ if (ggml_sve_cnt_b == QK8_0 ) {
390
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
391
391
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
392
392
}
393
393
#endif
@@ -498,8 +498,8 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
498
498
UNUSED (blocklen );
499
499
500
500
#if defined(__ARM_FEATURE_SVE )
501
- if (svcntw () == 8 ) {
502
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
501
+ if (ggml_sve_cnt_b == QK8_0 ) {
502
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
503
503
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
504
504
}
505
505
#endif
@@ -616,7 +616,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
616
616
UNUSED (blocklen );
617
617
618
618
#if defined(__ARM_FEATURE_SVE ) && ! ((defined(_MSC_VER )) && ! defined(__clang__ ))
619
- if (svcntw () == 8 ) {
619
+ if (ggml_sve_cnt_b == QK8_0 ) {
620
620
const void * b_ptr = vx ;
621
621
const void * a_ptr = vy ;
622
622
float * res_ptr = s ;
@@ -682,12 +682,12 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
682
682
return ;
683
683
}
684
684
else if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
685
- GGML_ASSERT ((ggml_cpu_has_sve () && (svcntw () == 8 )) &&
685
+ GGML_ASSERT ((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
686
686
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
687
687
"performance" );
688
688
}
689
689
else if (ggml_cpu_has_neon ()) {
690
- GGML_ASSERT (((ggml_cpu_has_sve () && (svcntw () == 8 )) || ggml_cpu_has_matmul_int8 ()) &&
690
+ GGML_ASSERT (((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) || ggml_cpu_has_matmul_int8 ()) &&
691
691
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
692
692
"quantization format for optimal performance" );
693
693
}
@@ -747,8 +747,8 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
747
747
UNUSED (blocklen );
748
748
749
749
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 )
750
- if (svcntw () == 8 ) {
751
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
750
+ if (ggml_sve_cnt_b == QK8_0 ) {
751
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
752
752
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
753
753
}
754
754
#endif
@@ -1268,8 +1268,8 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
1268
1268
UNUSED (blocklen );
1269
1269
1270
1270
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 )
1271
- if (svcntw () == 8 ) {
1272
- GGML_ASSERT (!(ggml_cpu_has_sve () && (svcntw () == 8 )) &&
1271
+ if (ggml_sve_cnt_b == QK8_0 ) {
1272
+ GGML_ASSERT (!(ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
1273
1273
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance" );
1274
1274
}
1275
1275
#endif
@@ -1730,7 +1730,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
1730
1730
UNUSED (blocklen );
1731
1731
1732
1732
#if defined(__ARM_FEATURE_SVE ) && defined(__ARM_FEATURE_MATMUL_INT8 ) && ! ((defined(_MSC_VER )) && ! defined(__clang__ ))
1733
- if (svcntw () == 8 ) {
1733
+ if (ggml_sve_cnt_b == QK8_0 ) {
1734
1734
const void * b_ptr = vx ;
1735
1735
const void * a_ptr = vy ;
1736
1736
float * res_ptr = s ;
@@ -2141,12 +2141,12 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
2141
2141
return ;
2142
2142
}
2143
2143
else if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
2144
- GGML_ASSERT ((ggml_cpu_has_sve () && (svcntw () == 8 )) &&
2144
+ GGML_ASSERT ((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) &&
2145
2145
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
2146
2146
"performance" );
2147
2147
}
2148
2148
else if (ggml_cpu_has_neon ()) {
2149
- GGML_ASSERT (((ggml_cpu_has_sve () && (svcntw () == 8 )) || ggml_cpu_has_matmul_int8 ()) &&
2149
+ GGML_ASSERT (((ggml_cpu_has_sve () && (ggml_sve_cnt_b == QK8_0 )) || ggml_cpu_has_matmul_int8 ()) &&
2150
2150
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
2151
2151
"quantization format for optimal performance" );
2152
2152
}
0 commit comments