Skip to content

Commit ec1e41c

Browse files
committed
Remove mulx
1 parent ec789ab commit ec1e41c

File tree

5 files changed

+39
-74
lines changed

5 files changed

+39
-74
lines changed

crypto/cryptonight_aesni.h

+5-20
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,6 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
2727
*hi = r >> 64;
2828
return (uint64_t)r;
2929
}
30-
31-
__attribute__((target ("bmi2"))) static inline uint64_t _mulx_u64(uint64_t a, uint64_t b, uint64_t* hi)
32-
{
33-
return _mulx_u64((unsigned long long)a, (unsigned long long)b, (unsigned long long*)hi);
34-
}
35-
3630
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
3731
#else
3832
#include <intrin.h>
@@ -284,7 +278,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
284278
_mm_store_si128(output + 11, xout7);
285279
}
286280

287-
template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
281+
template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
288282
void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
289283
{
290284
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -323,10 +317,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
323317
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
324318
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
325319

326-
if(MULX)
327-
lo = _mulx_u64(idx0, cl, &hi);
328-
else
329-
lo = _umul128(idx0, cl, &hi);
320+
lo = _umul128(idx0, cl, &hi);
330321

331322
al0 += hi;
332323
ah0 += lo;
@@ -352,7 +343,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
352343
// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
353344
// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
354345
// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
355-
template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH, bool MULX>
346+
template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
356347
void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1)
357348
{
358349
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
@@ -410,10 +401,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
410401
uint64_t hi, lo;
411402
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
412403

413-
if(MULX)
414-
lo = _mulx_u64(idx0, _mm_cvtsi128_si64(cx), &hi);
415-
else
416-
lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
404+
lo = _umul128(idx0, _mm_cvtsi128_si64(cx), &hi);
417405

418406
ax0 = _mm_add_epi64(ax0, _mm_set_epi64x(lo, hi));
419407
_mm_store_si128((__m128i*)&l0[idx0 & 0x1FFFF0], ax0);
@@ -425,10 +413,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
425413

426414
cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]);
427415

428-
if(MULX)
429-
lo = _mulx_u64(idx1, _mm_cvtsi128_si64(cx), &hi);
430-
else
431-
lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
416+
lo = _umul128(idx1, _mm_cvtsi128_si64(cx), &hi);
432417

433418
ax1 = _mm_add_epi64(ax1, _mm_set_epi64x(lo, hi));
434419
_mm_store_si128((__m128i*)&l1[idx1 & 0x1FFFF0], ax1);

jconf.cpp

-8
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ bool jconf::check_cpu_features()
277277
{
278278
constexpr int AESNI_BIT = 1 << 25;
279279
constexpr int SSE2_BIT = 1 << 26;
280-
constexpr int BMI2_BIT = 1 << 8;
281280
int32_t cpu_info[4];
282281
bool bHaveSse2;
283282

@@ -286,10 +285,6 @@ bool jconf::check_cpu_features()
286285
bHaveAes = (cpu_info[2] & AESNI_BIT) != 0;
287286
bHaveSse2 = (cpu_info[3] & SSE2_BIT) != 0;
288287

289-
cpuid(7, 0, cpu_info);
290-
291-
bHaveBmi2 = (cpu_info[1] & BMI2_BIT) != 0;
292-
293288
return bHaveSse2;
294289
}
295290

@@ -462,9 +457,6 @@ bool jconf::parse_config(const char* sFilename)
462457
{
463458
if(!bHaveAes)
464459
printer::inst()->print_msg(L0, "Your CPU doesn't support hardware AES. Don't expect high hashrates.");
465-
466-
if(bHaveBmi2)
467-
printer::inst()->print_msg(L0, "CPU supports BMI2 instructions. Faster multiplication enabled.");
468460
}
469461

470462
return true;

jconf.h

-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class jconf
5757
bool PreferIpv4();
5858

5959
inline bool HaveHardwareAes() { return bHaveAes; }
60-
inline bool HaveMulx() { return bHaveBmi2; }
6160

6261
static void cpuid(uint32_t eax, int32_t ecx, int32_t val[4]);
6362

@@ -70,5 +69,4 @@ class jconf
7069
opaque_private* prv;
7170

7271
bool bHaveAes;
73-
bool bHaveBmi2;
7472
};

minethd.cpp

+32-42
Original file line numberDiff line numberDiff line change
@@ -251,20 +251,20 @@ bool minethd::self_test()
251251
cn_hash_fun hashf;
252252
cn_hash_fun_dbl hashdf;
253253

254-
hashf = func_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx());
254+
hashf = func_selector(jconf::inst()->HaveHardwareAes(), false);
255255
hashf("This is a test", 14, out, ctx0);
256256
bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
257257

258-
hashf = func_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx());
258+
hashf = func_selector(jconf::inst()->HaveHardwareAes(), true);
259259
hashf("This is a test", 14, out, ctx0);
260260
bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
261261

262-
hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false, jconf::inst()->HaveMulx());
262+
hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), false);
263263
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
264264
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
265265
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
266266

267-
hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true, jconf::inst()->HaveMulx());
267+
hashdf = func_dbl_selector(jconf::inst()->HaveHardwareAes(), true);
268268
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
269269
bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
270270
"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
@@ -338,28 +338,23 @@ void minethd::consume_work()
338338
iConsumeCnt++;
339339
}
340340

341-
minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx)
341+
minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch)
342342
{
343-
// We have three independent flag bits in the functions
343+
// We have two independent flag bits in the functions
344344
// therefore we will build a binary digit and select the
345-
// function as a three digit binary
346-
// Digit order SOFT_AES, NO_PREFETCH, MULX
347-
348-
static const cn_hash_fun func_table[8] = {
349-
cryptonight_hash<0x80000, MEMORY, false, false, false>,
350-
cryptonight_hash<0x80000, MEMORY, false, false, true>,
351-
cryptonight_hash<0x80000, MEMORY, false, true, false>,
352-
cryptonight_hash<0x80000, MEMORY, false, true, true>,
353-
cryptonight_hash<0x80000, MEMORY, true, false, false>,
354-
cryptonight_hash<0x80000, MEMORY, true, false, true>,
355-
cryptonight_hash<0x80000, MEMORY, true, true, false>,
356-
cryptonight_hash<0x80000, MEMORY, true, true, true>
345+
// function as a two digit binary
346+
// Digit order SOFT_AES, NO_PREFETCH
347+
348+
static const cn_hash_fun func_table[4] = {
349+
cryptonight_hash<0x80000, MEMORY, false, false>,
350+
cryptonight_hash<0x80000, MEMORY, false, true>,
351+
cryptonight_hash<0x80000, MEMORY, true, false>,
352+
cryptonight_hash<0x80000, MEMORY, true, true>
357353
};
358354

359-
std::bitset<3> digit;
360-
digit.set(0, bMulx);
361-
digit.set(1, !bNoPrefetch);
362-
digit.set(2, !bHaveAes);
355+
std::bitset<2> digit;
356+
digit.set(0, !bNoPrefetch);
357+
digit.set(1, !bHaveAes);
363358

364359
return func_table[digit.to_ulong()];
365360
}
@@ -373,7 +368,7 @@ void minethd::work_main()
373368
uint32_t* piNonce;
374369
job_result result;
375370

376-
hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx());
371+
hash_fun = func_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch);
377372
ctx = minethd_alloc_ctx();
378373

379374
piHashVal = (uint64_t*)(result.bResult + 24);
@@ -430,28 +425,23 @@ void minethd::work_main()
430425
cryptonight_free_ctx(ctx);
431426
}
432427

433-
minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx)
428+
minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch)
434429
{
435-
// We have three independent flag bits in the functions
430+
// We have two independent flag bits in the functions
436431
// therefore we will build a binary digit and select the
437-
// function as a three digit binary
438-
// Digit order SOFT_AES, NO_PREFETCH, MULX
439-
440-
static const cn_hash_fun_dbl func_table[8] = {
441-
cryptonight_double_hash<0x80000, MEMORY, false, false, false>,
442-
cryptonight_double_hash<0x80000, MEMORY, false, false, true>,
443-
cryptonight_double_hash<0x80000, MEMORY, false, true, false>,
444-
cryptonight_double_hash<0x80000, MEMORY, false, true, true>,
445-
cryptonight_double_hash<0x80000, MEMORY, true, false, false>,
446-
cryptonight_double_hash<0x80000, MEMORY, true, false, true>,
447-
cryptonight_double_hash<0x80000, MEMORY, true, true, false>,
448-
cryptonight_double_hash<0x80000, MEMORY, true, true, true>
432+
// function as a two digit binary
433+
// Digit order SOFT_AES, NO_PREFETCH
434+
435+
static const cn_hash_fun_dbl func_table[4] = {
436+
cryptonight_double_hash<0x80000, MEMORY, false, false>,
437+
cryptonight_double_hash<0x80000, MEMORY, false, true>,
438+
cryptonight_double_hash<0x80000, MEMORY, true, false>,
439+
cryptonight_double_hash<0x80000, MEMORY, true, true>
449440
};
450441

451-
std::bitset<3> digit;
452-
digit.set(0, bMulx);
453-
digit.set(1, !bNoPrefetch);
454-
digit.set(2, !bHaveAes);
442+
std::bitset<2> digit;
443+
digit.set(0, !bNoPrefetch);
444+
digit.set(1, !bHaveAes);
455445

456446
return func_table[digit.to_ulong()];
457447
}
@@ -469,7 +459,7 @@ void minethd::double_work_main()
469459
uint32_t iNonce;
470460
job_result res;
471461

472-
hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch, jconf::inst()->HaveMulx());
462+
hash_fun = func_dbl_selector(jconf::inst()->HaveHardwareAes(), bNoPrefetch);
473463
ctx0 = minethd_alloc_ctx();
474464
ctx1 = minethd_alloc_ctx();
475465

minethd.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ class minethd
114114
inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume)
115115
{ return start | (resume * iThreadCount + iThreadNo) << 18; }
116116

117-
static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx);
118-
static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch, bool bMulx);
117+
static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch);
118+
static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch);
119119

120120
void work_main();
121121
void double_work_main();

0 commit comments

Comments
 (0)