@@ -129,45 +129,72 @@ namespace x86simdsort {
129
129
} \
130
130
}
131
131
132
- #define DISPATCH_KEYVALUE_SORT (TYPE1, TYPE2, ISA ) \
133
- static void (CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))( \
132
+ #define ISA_LIST (...) \
133
+ std::initializer_list<std::string_view> \
134
+ { \
135
+ __VA_ARGS__ \
136
+ }
137
+
138
+ #ifdef __FLT16_MAX__
139
+ DISPATCH (qsort, _Float16, ISA_LIST(" avx512_spr" ))
140
+ DISPATCH(qselect, _Float16, ISA_LIST(" avx512_spr" ))
141
+ DISPATCH(partial_qsort, _Float16, ISA_LIST(" avx512_spr" ))
142
+ DISPATCH(argsort, _Float16, ISA_LIST(" none" ))
143
+ DISPATCH(argselect, _Float16, ISA_LIST(" none" ))
144
+ #endif
145
+
146
+ #define DISPATCH_ALL (func, ISA_16BIT, ISA_32BIT, ISA_64BIT ) \
147
+ DISPATCH (func, uint16_t , ISA_16BIT) \
148
+ DISPATCH (func, int16_t , ISA_16BIT) \
149
+ DISPATCH (func, float , ISA_32BIT) \
150
+ DISPATCH (func, int32_t , ISA_32BIT) \
151
+ DISPATCH (func, uint32_t , ISA_32BIT) \
152
+ DISPATCH (func, int64_t , ISA_64BIT) \
153
+ DISPATCH (func, uint64_t , ISA_64BIT) \
154
+ DISPATCH (func, double , ISA_64BIT)
155
+
156
+ DISPATCH_ALL (qsort,
157
+ (ISA_LIST(" avx512_icl" )),
158
+ (ISA_LIST(" avx512_skx" , " avx2" )),
159
+ (ISA_LIST(" avx512_skx" , " avx2" )))
160
+ DISPATCH_ALL (qselect,
161
+ (ISA_LIST(" avx512_icl" )),
162
+ (ISA_LIST(" avx512_skx" , " avx2" )),
163
+ (ISA_LIST(" avx512_skx" , " avx2" )))
164
+ DISPATCH_ALL (partial_qsort,
165
+ (ISA_LIST(" avx512_icl" )),
166
+ (ISA_LIST(" avx512_skx" , " avx2" )),
167
+ (ISA_LIST(" avx512_skx" , " avx2" )))
168
+ DISPATCH_ALL (argsort,
169
+ (ISA_LIST(" none" )),
170
+ (ISA_LIST(" avx512_skx" , " avx2" )),
171
+ (ISA_LIST(" avx512_skx" , " avx2" )))
172
+ DISPATCH_ALL (argselect,
173
+ (ISA_LIST(" none" )),
174
+ (ISA_LIST(" avx512_skx" , " avx2" )),
175
+ (ISA_LIST(" avx512_skx" , " avx2" )))
176
+
177
+ /* Key-Value methods */
178
+ #define DECLARE_ALL_KEYVALUE_METHODS (TYPE1, TYPE2 ) \
179
+ static void (CAT(CAT(*internal_keyvalue_qsort_, TYPE1), TYPE2))( \
134
180
TYPE1 *, TYPE2 *, size_t , bool , bool ) \
135
181
= NULL; \
182
+ static void (CAT(CAT(*internal_keyvalue_select_, TYPE1), TYPE2))( \
183
+ TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
184
+ = NULL; \
185
+ static void (CAT(CAT(*internal_keyvalue_partial_sort_, TYPE1), TYPE2))( \
186
+ TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
187
+ = NULL; \
136
188
template <> \
137
189
void keyvalue_qsort (TYPE1 *key, \
138
190
TYPE2 *val, \
139
191
size_t arrsize, \
140
192
bool hasnan, \
141
193
bool descending) \
142
194
{ \
143
- (CAT (CAT (*internal_kv_qsort_ , TYPE1), TYPE2))( \
195
+ (CAT (CAT (*internal_keyvalue_qsort_ , TYPE1), TYPE2))( \
144
196
key, val, arrsize, hasnan, descending); \
145
197
} \
146
- static __attribute__ ((constructor)) void CAT( \
147
- CAT (resolve_keyvalue_qsort_, TYPE1), TYPE2)(void ) \
148
- { \
149
- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
150
- = &xss::scalar::keyvalue_qsort<TYPE1, TYPE2>; \
151
- __builtin_cpu_init (); \
152
- std::string_view preferred_cpu = find_preferred_cpu (ISA); \
153
- if constexpr (dispatch_requested (" avx512" , ISA)) { \
154
- if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
155
- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
156
- = &xss::avx512::keyvalue_qsort<TYPE1, TYPE2>; \
157
- return ; \
158
- } \
159
- } \
160
- if constexpr (dispatch_requested (" avx2" , ISA)) { \
161
- if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
162
- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
163
- = &xss::avx2::keyvalue_qsort<TYPE1, TYPE2>; \
164
- return ; \
165
- } \
166
- } \
167
- } \
168
- static void (CAT(CAT(*internal_kv_select_, TYPE1), TYPE2))( \
169
- TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
170
- = NULL; \
171
198
template <> \
172
199
void keyvalue_select (TYPE1 *key, \
173
200
TYPE2 *val, \
@@ -176,34 +203,9 @@ namespace x86simdsort {
176
203
bool hasnan, \
177
204
bool descending) \
178
205
{ \
179
- (CAT (CAT (*internal_kv_select_ , TYPE1), TYPE2))( \
206
+ (CAT (CAT (*internal_keyvalue_select_ , TYPE1), TYPE2))( \
180
207
key, val, k, arrsize, hasnan, descending); \
181
208
} \
182
- static __attribute__ ((constructor)) void CAT( \
183
- CAT (resolve_keyvalue_select_, TYPE1), TYPE2)(void ) \
184
- { \
185
- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
186
- = &xss::scalar::keyvalue_select<TYPE1, TYPE2>; \
187
- __builtin_cpu_init (); \
188
- std::string_view preferred_cpu = find_preferred_cpu (ISA); \
189
- if constexpr (dispatch_requested (" avx512" , ISA)) { \
190
- if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
191
- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
192
- = &xss::avx512::keyvalue_select<TYPE1, TYPE2>; \
193
- return ; \
194
- } \
195
- } \
196
- if constexpr (dispatch_requested (" avx2" , ISA)) { \
197
- if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
198
- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
199
- = &xss::avx2::keyvalue_select<TYPE1, TYPE2>; \
200
- return ; \
201
- } \
202
- } \
203
- } \
204
- static void (CAT(CAT(*internal_kv_partial_sort_, TYPE1), TYPE2))( \
205
- TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
206
- = NULL; \
207
209
template <> \
208
210
void keyvalue_partial_sort (TYPE1 *key, \
209
211
TYPE2 *val, \
@@ -212,76 +214,39 @@ namespace x86simdsort {
212
214
bool hasnan, \
213
215
bool descending) \
214
216
{ \
215
- (CAT (CAT (*internal_kv_partial_sort_ , TYPE1), TYPE2))( \
217
+ (CAT (CAT (*internal_keyvalue_partial_sort_ , TYPE1), TYPE2))( \
216
218
key, val, k, arrsize, hasnan, descending); \
217
- } \
219
+ }
220
+
221
+ #define DISPATCH_KV_FUNC (func, TYPE1, TYPE2, ISA ) \
218
222
static __attribute__ ((constructor)) void CAT( \
219
- CAT (resolve_keyvalue_partial_sort_ , TYPE1), TYPE2)(void ) \
223
+ CAT (CAT(CAT(resolve_, func), _) , TYPE1), TYPE2)(void ) \
220
224
{ \
221
- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
222
- = &xss::scalar::keyvalue_partial_sort <TYPE1, TYPE2>; \
225
+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
226
+ = &xss::scalar::func <TYPE1, TYPE2>; \
223
227
__builtin_cpu_init (); \
224
228
std::string_view preferred_cpu = find_preferred_cpu (ISA); \
225
229
if constexpr (dispatch_requested (" avx512" , ISA)) { \
226
230
if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
227
- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
228
- = &xss::avx512::keyvalue_partial_sort <TYPE1, TYPE2>; \
231
+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
232
+ = &xss::avx512::func <TYPE1, TYPE2>; \
229
233
return ; \
230
234
} \
231
235
} \
232
236
if constexpr (dispatch_requested (" avx2" , ISA)) { \
233
237
if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
234
- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
235
- = &xss::avx2::keyvalue_partial_sort <TYPE1, TYPE2>; \
238
+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
239
+ = &xss::avx2::func <TYPE1, TYPE2>; \
236
240
return ; \
237
241
} \
238
242
} \
239
243
}
240
244
241
- #define ISA_LIST (...) \
242
- std::initializer_list<std::string_view> \
243
- { \
244
- __VA_ARGS__ \
245
- }
246
-
247
- #ifdef __FLT16_MAX__
248
- DISPATCH (qsort, _Float16, ISA_LIST(" avx512_spr" ))
249
- DISPATCH(qselect, _Float16, ISA_LIST(" avx512_spr" ))
250
- DISPATCH(partial_qsort, _Float16, ISA_LIST(" avx512_spr" ))
251
- DISPATCH(argsort, _Float16, ISA_LIST(" none" ))
252
- DISPATCH(argselect, _Float16, ISA_LIST(" none" ))
253
- #endif
254
-
255
- #define DISPATCH_ALL (func, ISA_16BIT, ISA_32BIT, ISA_64BIT ) \
256
- DISPATCH (func, uint16_t , ISA_16BIT) \
257
- DISPATCH(func, int16_t , ISA_16BIT) \
258
- DISPATCH(func, float , ISA_32BIT) \
259
- DISPATCH(func, int32_t , ISA_32BIT) \
260
- DISPATCH(func, uint32_t , ISA_32BIT) \
261
- DISPATCH(func, int64_t , ISA_64BIT) \
262
- DISPATCH(func, uint64_t , ISA_64BIT) \
263
- DISPATCH(func, double , ISA_64BIT)
264
-
265
- DISPATCH_ALL(qsort,
266
- (ISA_LIST(" avx512_icl" )),
267
- (ISA_LIST(" avx512_skx" , " avx2" )),
268
- (ISA_LIST(" avx512_skx" , " avx2" )))
269
- DISPATCH_ALL(qselect,
270
- (ISA_LIST(" avx512_icl" )),
271
- (ISA_LIST(" avx512_skx" , " avx2" )),
272
- (ISA_LIST(" avx512_skx" , " avx2" )))
273
- DISPATCH_ALL(partial_qsort,
274
- (ISA_LIST(" avx512_icl" )),
275
- (ISA_LIST(" avx512_skx" , " avx2" )),
276
- (ISA_LIST(" avx512_skx" , " avx2" )))
277
- DISPATCH_ALL(argsort,
278
- (ISA_LIST(" none" )),
279
- (ISA_LIST(" avx512_skx" , " avx2" )),
280
- (ISA_LIST(" avx512_skx" , " avx2" )))
281
- DISPATCH_ALL(argselect,
282
- (ISA_LIST(" none" )),
283
- (ISA_LIST(" avx512_skx" , " avx2" )),
284
- (ISA_LIST(" avx512_skx" , " avx2" )))
245
+ #define DISPATCH_KEYVALUE_SORT (TYPE1, TYPE2, ISA ) \
246
+ DECLARE_ALL_KEYVALUE_METHODS (TYPE1, TYPE2) \
247
+ DISPATCH_KV_FUNC(keyvalue_qsort, TYPE1, TYPE2, ISA) \
248
+ DISPATCH_KV_FUNC(keyvalue_select, TYPE1, TYPE2, ISA) \
249
+ DISPATCH_KV_FUNC(keyvalue_partial_sort, TYPE1, TYPE2, ISA)
285
250
286
251
#define DISPATCH_KEYVALUE_SORT_FORTYPE (type ) \
287
252
DISPATCH_KEYVALUE_SORT (type, uint64_t , (ISA_LIST(" avx512_skx" , " avx2" ))) \
0 commit comments