Skip to content

Commit 9fd995b

Browse files
authored
Merge pull request #190 from sterrettm2/fp16_swizzle_fix
Change 16-bit swizzle from vector to C arrays
2 parents 935402a + dc80d20 commit 9fd995b

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

src/avx512-16bit-common.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ struct avx512_16bit_swizzle_ops {
1414
__m512i v = vtype::cast_to(reg);
1515

1616
if constexpr (scale == 2) {
17-
std::vector<uint16_t> arr
17+
constexpr static uint16_t arr[]
1818
= {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11,
1919
10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20,
2020
23, 22, 25, 24, 27, 26, 29, 28, 31, 30};
21-
__m512i mask = _mm512_loadu_si512(arr.data());
21+
__m512i mask = _mm512_loadu_si512(arr);
2222
v = _mm512_permutexvar_epi16(mask, v);
2323
}
2424
else if constexpr (scale == 4) {
@@ -48,27 +48,27 @@ struct avx512_16bit_swizzle_ops {
4848

4949
if constexpr (scale == 2) { return swap_n<vtype, 2>(reg); }
5050
else if constexpr (scale == 4) {
51-
std::vector<uint16_t> arr
51+
constexpr static uint16_t arr[]
5252
= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9,
5353
8, 15, 14, 13, 12, 19, 18, 17, 16, 23, 22,
5454
21, 20, 27, 26, 25, 24, 31, 30, 29, 28};
55-
__m512i mask = _mm512_loadu_si512(arr.data());
55+
__m512i mask = _mm512_loadu_si512(arr);
5656
v = _mm512_permutexvar_epi16(mask, v);
5757
}
5858
else if constexpr (scale == 8) {
59-
std::vector<uint16_t> arr
59+
constexpr static int16_t arr[]
6060
= {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13,
6161
12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18,
6262
17, 16, 31, 30, 29, 28, 27, 26, 25, 24};
63-
__m512i mask = _mm512_loadu_si512(arr.data());
63+
__m512i mask = _mm512_loadu_si512(arr);
6464
v = _mm512_permutexvar_epi16(mask, v);
6565
}
6666
else if constexpr (scale == 16) {
67-
std::vector<uint16_t> arr
67+
constexpr static uint16_t arr[]
6868
= {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
6969
4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26,
7070
25, 24, 23, 22, 21, 20, 19, 18, 17, 16};
71-
__m512i mask = _mm512_loadu_si512(arr.data());
71+
__m512i mask = _mm512_loadu_si512(arr);
7272
v = _mm512_permutexvar_epi16(mask, v);
7373
}
7474
else if constexpr (scale == 32) {

0 commit comments

Comments
 (0)