@@ -68,12 +68,17 @@ struct avx2_vector<int64_t> {
68
68
{
69
69
return _mm256_set1_epi64x (type_max ());
70
70
} // TODO: this should broadcast bits as is?
71
+ static opmask_t knot_opmask (opmask_t x)
72
+ {
73
+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
74
+ return _mm256_xor_si256 (x, allTrue);
75
+ }
71
76
static opmask_t get_partial_loadmask (uint64_t num_to_read)
72
77
{
73
78
auto mask = ((0x1ull << num_to_read) - 0x1ull );
74
79
return convert_int_to_avx2_mask_64bit (mask);
75
80
}
76
- static ymmi_t seti (int v1, int v2, int v3, int v4)
81
+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
77
82
{
78
83
return _mm256_set_epi64x (v1, v2, v3, v4);
79
84
}
@@ -209,6 +214,9 @@ struct avx2_vector<int64_t> {
209
214
{
210
215
return v;
211
216
}
217
+ static bool all_false (opmask_t k){
218
+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
219
+ }
212
220
};
213
221
template <>
214
222
struct avx2_vector <uint64_t > {
@@ -239,12 +247,17 @@ struct avx2_vector<uint64_t> {
239
247
{
240
248
return _mm256_set1_epi64x (type_max ());
241
249
}
250
+ static opmask_t knot_opmask (opmask_t x)
251
+ {
252
+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
253
+ return _mm256_xor_si256 (x, allTrue);
254
+ }
242
255
static opmask_t get_partial_loadmask (uint64_t num_to_read)
243
256
{
244
257
auto mask = ((0x1ull << num_to_read) - 0x1ull );
245
258
return convert_int_to_avx2_mask_64bit (mask);
246
259
}
247
- static ymmi_t seti (int v1, int v2, int v3, int v4)
260
+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
248
261
{
249
262
return _mm256_set_epi64x (v1, v2, v3, v4);
250
263
}
@@ -378,6 +391,9 @@ struct avx2_vector<uint64_t> {
378
391
{
379
392
return v;
380
393
}
394
+ static bool all_false (opmask_t k){
395
+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
396
+ }
381
397
};
382
398
383
399
/*
@@ -421,6 +437,11 @@ struct avx2_vector<double> {
421
437
{
422
438
return _mm256_set1_pd (type_max ());
423
439
}
440
+ static opmask_t knot_opmask (opmask_t x)
441
+ {
442
+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
443
+ return _mm256_xor_si256 (x, allTrue);
444
+ }
424
445
static opmask_t get_partial_loadmask (uint64_t num_to_read)
425
446
{
426
447
auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -440,7 +461,7 @@ struct avx2_vector<double> {
440
461
static_assert (type == (0x01 | 0x80 ), " should not reach here" );
441
462
}
442
463
}
443
- static ymmi_t seti (int v1, int v2, int v3, int v4)
464
+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
444
465
{
445
466
return _mm256_set_epi64x (v1, v2, v3, v4);
446
467
}
@@ -571,6 +592,9 @@ struct avx2_vector<double> {
571
592
{
572
593
return _mm256_castpd_si256 (v);
573
594
}
595
+ static bool all_false (opmask_t k){
596
+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
597
+ }
574
598
};
575
599
576
600
struct avx2_64bit_swizzle_ops {
0 commit comments