diff --git a/CMakeLists.txt b/CMakeLists.txt index 067bbd0a2f7fd85fff613cd5767da2ec471624e7..cf439227fe394bb46421614b2c98a01506b073b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -459,6 +459,7 @@ endif() target_link_libraries(xmr-stak-c ${LIBS}) enable_language(ASM) +set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.S" PROPERTY LANGUAGE C) # asm optimized monero v8 code add_library(xmr-stak-asm STATIC diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc index ea7f799fd7b7f0769fbc1d6d6dbc06af1c63628d..1cc20b35aed070f5d2fe04aa3251304a02adff9a 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc @@ -157,14 +157,14 @@ $sqrt_fixup_ivybridge_ret: $sqrt_fixup_ivybridge: dec rdx - mov r13, -4389456576512 + movq r13, -4389456576512 mov rax, rdx shr rdx, 19 shr rax, 20 mov rcx, rdx sub rcx, rax add rax, r13 - mov r13, 4389456576511 + movq r13, 4389456576511 sub rcx, r13 mov r13d, -2147483647 imul rcx, rax diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc index 5797f5497ad21972d49b0b9f714b4595bcbb570e..c564d8949f783eafc3b55874cb4044140febe257 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc @@ -157,14 +157,14 @@ $sqrt_fixup_ryzen_ret: $sqrt_fixup_ryzen: movq r9, xmm2 dec rdi - mov rdx, 4389456576511 + movq rdx, 4389456576511 mov rax, rdi shr rdi, 19 shr rax, 20 mov rcx, rdi sub rcx, rax sub rcx, rdx - mov rdx, -4389456576512 + movq rdx, -4389456576512 add rax, rdx imul rcx, rax sub rcx, r9 diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 7c409d187727f77e5541e128db27a8439c0e4aab..0838cfac4567fda10035b7ae1b75fc5e48a1ebfd 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -435,7 +435,9 @@ inline uint64_t int_sqrt33_1_double_precision(const uint64_t n0) uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1); -#if defined _MSC_VER || (__GNUC__ >= 7) +#ifdef __INTEL_COMPILER + _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned __int64*)&x2), r, 0, (unsigned __int64*)&r); +#elif defined(_MSC_VER) || (__GNUC__ >= 7) _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r); #else // GCC versions prior to 7 don't generate correct assembly for _subborrow_u64 -> _addcarry_u64 sequence