1
1
#include < ggml.h>
2
2
#include < ggml-alloc.h>
3
3
#include < ggml-backend.h>
4
- #include < ggml-backend-impl.h>
5
4
6
5
#include < algorithm>
7
6
#include < array>
@@ -80,14 +79,22 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
80
79
im = nullptr ;
81
80
}
82
81
}
82
+
83
83
ggml_quantize_chunk (tensor->type , data.data (), dataq.data (), 0 , size/tensor->ne [0 ], tensor->ne [0 ], im);
84
84
GGML_ASSERT (ggml_validate_row_data (tensor->type , dataq.data (), dataq.size ()));
85
+ // TODO: other cases
86
+ // #pragma omp parallel for
87
+ // for (int i = 0; i < tensor->ne[1]; i++) {
88
+ // ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
89
+ // i * tensor->ne[0], 1, tensor->ne[0], im);
90
+ // }
91
+
85
92
ggml_backend_tensor_set (tensor, dataq.data (), 0 , dataq.size ());
86
93
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
87
94
// This is going to create some weird integers though.
88
95
ggml_backend_tensor_set (tensor, data.data (), 0 , ggml_nbytes (tensor));
89
96
} else {
90
- GGML_ASSERT ( false );
97
+ GGML_ABORT ( " fatal error " );
91
98
}
92
99
}
93
100
@@ -125,7 +132,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
125
132
tt.to_float (&buf[i], vq.data (), bs);
126
133
tv.insert (tv.end (), vq.begin (), vq.end ());
127
134
} else {
128
- GGML_ASSERT ( false );
135
+ GGML_ABORT ( " fatal error " );
129
136
}
130
137
}
131
138
}
@@ -760,7 +767,7 @@ struct test_dup : public test_case {
760
767
}
761
768
762
769
test_dup (ggml_type type = GGML_TYPE_F32,
763
- std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 },
770
+ std::array<int64_t , 4 > ne = {10 , 10 , 20 , 1 },
764
771
std::array<int64_t , 4 > permute = {0 , 0 , 0 , 0 })
765
772
: type(type), ne(ne), permute(permute),
766
773
_use_permute (permute[0 ] + permute[1 ] + permute[2 ] + permute[3 ] > 0 ) {}
@@ -780,9 +787,11 @@ struct test_cpy : public test_case {
780
787
const ggml_type type_src;
781
788
const ggml_type type_dst;
782
789
const std::array<int64_t , 4 > ne;
790
+ const std::array<int64_t , 4 > permute;
791
+ bool _src_use_permute;
783
792
784
793
std::string vars () override {
785
- return VARS_TO_STR3 (type_src, type_dst, ne);
794
+ return VARS_TO_STR4 (type_src, type_dst, ne, permute );
786
795
}
787
796
788
797
double max_nmse_err () override {
@@ -794,12 +803,17 @@ struct test_cpy : public test_case {
794
803
}
795
804
796
805
test_cpy (ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
797
- std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 })
798
- : type_src(type_src), type_dst(type_dst), ne(ne) {}
806
+ std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 },
807
+ std::array<int64_t , 4 > permute = {0 , 0 , 0 , 0 })
808
+ : type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
809
+ _src_use_permute (permute[0 ] + permute[1 ] + permute[2 ] + permute[3 ] > 0 ) {}
799
810
800
811
ggml_tensor * build_graph (ggml_context * ctx) override {
801
812
ggml_tensor * src = ggml_new_tensor (ctx, type_src, 4 , ne.data ());
802
- ggml_tensor * dst = ggml_new_tensor (ctx, type_dst, 4 , ne.data ());
813
+ if (_src_use_permute) {
814
+ src = ggml_permute (ctx, src, permute[0 ], permute[1 ], permute[2 ], permute[3 ]);
815
+ }
816
+ ggml_tensor* dst = ggml_new_tensor (ctx, type_dst, 4 , src->ne );
803
817
ggml_tensor * out = ggml_cpy (ctx, src, dst);
804
818
return out;
805
819
}
@@ -1175,6 +1189,7 @@ struct test_soft_max : public test_case {
1175
1189
}
1176
1190
};
1177
1191
1192
+
1178
1193
// GGML_OP_ROPE
1179
1194
struct test_rope : public test_case {
1180
1195
const ggml_type type;
@@ -1267,6 +1282,32 @@ struct test_pool2d : public test_case {
1267
1282
}
1268
1283
};
1269
1284
1285
+ // GGML_OP_CONV_TRANSPOSE_1D
1286
+ struct test_conv_transpose_1d : public test_case {
1287
+ const std::array<int64_t , 4 > ne_input;
1288
+ const std::array<int64_t , 4 > ne_kernel;
1289
+
1290
+ const int s0; // stride
1291
+ const int p0; // padding
1292
+ const int d0; // dilation
1293
+
1294
+ std::string vars () override {
1295
+ return VARS_TO_STR5 (ne_input, ne_kernel, s0, p0, d0);
1296
+ }
1297
+
1298
+ test_conv_transpose_1d (std::array<int64_t , 4 > ne_input = {197 , 32 , 1 , 1 }, // [input_width, input_height, input_channels, 1]
1299
+ std::array<int64_t , 4 > ne_kernel = {16 , 32 , 32 , 1 }, // [kernel_width, kernel_height, input_channels, 1]
1300
+ int s0 = 1 , int p0 = 0 , int d0 = 1 )
1301
+ : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0) {}
1302
+
1303
+ ggml_tensor * build_graph (ggml_context * ctx) override {
1304
+ ggml_tensor * input = ggml_new_tensor (ctx, GGML_TYPE_F32, 4 , ne_input.data ());
1305
+ ggml_tensor * kernel = ggml_new_tensor (ctx, GGML_TYPE_F32, 4 , ne_kernel.data ());
1306
+ ggml_tensor * out = ggml_conv_transpose_1d (ctx, kernel, input, s0, p0, d0);
1307
+ return out;
1308
+ }
1309
+ };
1310
+
1270
1311
// GGML_OP_IM2COL
1271
1312
struct test_im2col : public test_case {
1272
1313
const ggml_type type_input;
@@ -1280,7 +1321,7 @@ struct test_im2col : public test_case {
1280
1321
// padding
1281
1322
const int p0;
1282
1323
const int p1;
1283
- // dilatation
1324
+ // dilation
1284
1325
const int d0;
1285
1326
const int d1;
1286
1327
// mode
@@ -1393,7 +1434,7 @@ struct test_argsort : public test_case {
1393
1434
ggml_backend_tensor_set (t, data.data (), r * t->nb [1 ], t->ne [0 ] * sizeof (float ));
1394
1435
}
1395
1436
} else {
1396
- GGML_ASSERT ( false );
1437
+ GGML_ABORT ( " fatal error " );
1397
1438
}
1398
1439
}
1399
1440
}
@@ -1470,19 +1511,21 @@ struct test_group_norm : public test_case {
1470
1511
const ggml_type type;
1471
1512
const std::array<int64_t , 4 > ne;
1472
1513
const int32_t num_groups;
1514
+ const float eps;
1473
1515
1474
1516
std::string vars () override {
1475
1517
return VARS_TO_STR3 (type, ne, num_groups);
1476
1518
}
1477
1519
1478
1520
test_group_norm (ggml_type type = GGML_TYPE_F32,
1479
1521
std::array<int64_t , 4 > ne = {64 , 64 , 320 , 1 },
1480
- int32_t num_groups = 32 )
1481
- : type(type), ne(ne), num_groups(num_groups) {}
1522
+ int32_t num_groups = 32 ,
1523
+ float eps = 1e-6f )
1524
+ : type(type), ne(ne), num_groups(num_groups), eps(eps) {}
1482
1525
1483
1526
ggml_tensor * build_graph (ggml_context * ctx) override {
1484
1527
ggml_tensor * a = ggml_new_tensor (ctx, type, 4 , ne.data ());
1485
- ggml_tensor * out = ggml_group_norm (ctx, a, num_groups);
1528
+ ggml_tensor * out = ggml_group_norm (ctx, a, num_groups, eps );
1486
1529
return out;
1487
1530
}
1488
1531
};
@@ -2053,6 +2096,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2053
2096
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
2054
2097
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
2055
2098
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
2099
+ GGML_TYPE_BF16,
2056
2100
};
2057
2101
2058
2102
// unary ops
@@ -2097,6 +2141,19 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2097
2141
2098
2142
test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
2099
2143
test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
2144
+ // test cases for 1D im2col
2145
+ test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
2146
+ test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
2147
+
2148
+ test_cases.emplace_back (new test_conv_transpose_1d ());
2149
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 3 , 0 , 1 ));
2150
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 2 , 0 , 1 ));
2151
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 1 , 0 , 1 ));
2152
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,2 ,2 ,1 }, 2 , 0 , 1 ));
2153
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,2 ,2 ,1 }, 1 , 0 , 1 ));
2154
+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,1 ,2 ,1 }, 1 , 0 , 1 ));
2155
+ test_cases.emplace_back (new test_conv_transpose_1d ({2 ,1 ,1 ,1 }, {3 ,1 ,1 ,1 }, 1 , 0 , 1 ));
2156
+
2100
2157
2101
2158
test_cases.emplace_back (new test_repeat (GGML_TYPE_F32, {10 , 10 , 10 , 10 }, {1 , 1 , 1 , 1 }));
2102
2159
test_cases.emplace_back (new test_repeat (GGML_TYPE_F32, {10 , 10 , 10 , 10 }, {2 , 1 , 1 , 1 }));
@@ -2110,12 +2167,22 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2110
2167
test_cases.emplace_back (new test_dup (GGML_TYPE_F16));
2111
2168
test_cases.emplace_back (new test_dup (GGML_TYPE_I32));
2112
2169
test_cases.emplace_back (new test_dup (GGML_TYPE_I16));
2170
+ test_cases.emplace_back (new test_dup (GGML_TYPE_F32, {10 , 10 , 5 , 1 }, {0 , 2 , 1 , 3 }));
2171
+ test_cases.emplace_back (new test_dup (GGML_TYPE_F16, {10 , 10 , 5 , 1 }, {0 , 2 , 1 , 3 })); // dup by rows
2172
+ test_cases.emplace_back (new test_dup (GGML_TYPE_F32, {10 , 10 , 5 , 1 }, {1 , 0 , 2 , 3 }));
2173
+ test_cases.emplace_back (new test_dup (GGML_TYPE_F16, {10 , 10 , 5 , 1 }, {1 , 0 , 2 , 3 })); // dup dst not-contiguous
2113
2174
test_cases.emplace_back (new test_dup (GGML_TYPE_I16, {10 , 8 , 3 , 1 }, {0 , 2 , 1 , 3 }));
2114
2175
test_cases.emplace_back (new test_dup (GGML_TYPE_I16, {10 , 8 , 3 , 1 }, {1 , 2 , 0 , 3 }));
2115
2176
2116
2177
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
2117
2178
for (ggml_type type_dst : all_types) {
2118
2179
test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 4 , 4 , 4 }));
2180
+ test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {0 , 2 , 1 , 3 })); // cpy by rows
2181
+ }
2182
+ }
2183
+ for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
2184
+ for (ggml_type type_dst : {GGML_TYPE_F16, GGML_TYPE_F32}) {
2185
+ test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {1 , 0 , 2 , 3 })); // cpy not-contiguous
2119
2186
}
2120
2187
}
2121
2188
@@ -2165,6 +2232,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2165
2232
test_cases.emplace_back (new test_rms_norm (GGML_TYPE_F32, {64 , 10 , 10 , 10 }, eps));
2166
2233
}
2167
2234
2235
+ #if 1
2168
2236
for (ggml_type type_a : base_types) {
2169
2237
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
2170
2238
test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , { 1 , 1 }, {1 , 1 }));
@@ -2184,10 +2252,31 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2184
2252
test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {10 , 10 }, {2 , 2 }));
2185
2253
}
2186
2254
}
2255
+ #else
2256
+ // m = a rows
2257
+ // n = b rows
2258
+ // k = cols
2259
+ std::uniform_int_distribution<> dist_m(1, 128);
2260
+ std::uniform_int_distribution<> dist_n(16, 128);
2261
+ std::uniform_int_distribution<> dist_k(1, 16);
2262
+ for (int i = 0; i < 1000; i++) {
2263
+ for (ggml_type type_a : all_types) {
2264
+ for (ggml_type type_b : {GGML_TYPE_F32}) {
2265
+ int m = dist_m(rng);
2266
+ int n = dist_n(rng);
2267
+ int k = dist_k(rng) * ggml_blck_size(type_a);
2268
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1}, {1, 1}));
2269
+ }
2270
+ }
2271
+ }
2272
+ #endif
2187
2273
2188
2274
for (ggml_type type_a : other_types) {
2189
2275
for (ggml_type type_b : {GGML_TYPE_F32}) {
2190
- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , { 1 , 1 }, {1 , 1 }));
2276
+ if (ggml_blck_size (type_a) != 256 ) {
2277
+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , ggml_blck_size (type_a), {1 , 1 }, {1 , 1 }));
2278
+ }
2279
+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {1 , 1 }, {1 , 1 }));
2191
2280
}
2192
2281
}
2193
2282
@@ -2247,7 +2336,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2247
2336
for (int n = 0; n < 10; ++n) {
2248
2337
int64_t ne0 = dist_ne0(rng);
2249
2338
int64_t ne1 = dist_ne1(rng);
2250
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
2339
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
2251
2340
}
2252
2341
2253
2342
exponent <<= 1;
@@ -2266,7 +2355,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2266
2355
}
2267
2356
}
2268
2357
}
2269
-
2358
+ test_cases. emplace_back ( new test_soft_max (GGML_TYPE_F32, { 16 , 2 , 32 , 1 }, true , 0 . 1f , 0 . 0f ));
2270
2359
test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {16 , 2 , 32 , 1 }, false , 0 .1f , 0 .0f ));
2271
2360
test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {32 , 2 , 32 , 1 }, true , 0 .1f , 0 .0f ));
2272
2361
test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {32 , 2 , 32 , 1 }, true , 0 .1f , 8 .0f ));
@@ -2380,7 +2469,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2380
2469
return true ;
2381
2470
}
2382
2471
2383
- GGML_ASSERT ( false );
2472
+ GGML_ABORT ( " fatal error " );
2384
2473
return false ;
2385
2474
}
2386
2475
0 commit comments