From 1692c543c6be416f5b6b14e1501c880e62ee5fe6 Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocryptHPC@gmail.com> Date: Wed, 19 Sep 2018 18:05:47 +0200 Subject: [PATCH] asm, style and spelling fixes - fix code style issues - fix spelling issue - fix asm to support newer clang versions --- xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 4 ++-- xmrstak/backend/amd/config.tpl | 4 ++-- xmrstak/backend/amd/jconf.cpp | 4 ++-- xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S | 4 ++-- xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm | 4 ++-- .../crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc | 5 +++-- .../cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc | 6 +++--- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 6 +++--- xmrstak/backend/cpu/minethd.cpp | 2 +- 9 files changed, 20 insertions(+), 19 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 7d0ad18..286bc39 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -718,7 +718,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states #elif(ALGO==11) SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0]; # ifdef __NV_CL_C_VERSION - // flush shuffeled data + // flush shuffled data SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line; idx0 = c[0] & MASK; idxS = idx0 & 0x30; @@ -786,7 +786,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states // cryptonight_monero_v8 #if (ALGO == 11) # if defined(__NV_CL_C_VERSION) - // flush shuffeled data + // flush shuffled data SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line; # endif b_x[1] = b_x[0]; diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 63106bc..043b053 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -9,12 +9,12 @@ R"===( * 2 = chunked memory, chunk size is controlled by 'mem_chunk' * required: intensity must be a multiple of worksize * 1 or true = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks - * (not allowed for cryptonight_v8 ans monero8) + * (not allowed for cryptonight_v8 and monero8) * 0 or false = use a contiguous block of memory per thread * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk * this value is only used if 'strided_index' == 2 * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) - * unroll - allow to control how often the POW main loop is unrolled; valid range [0;128] + * unroll - allow to control how often the POW main loop is unrolled; valid range [0;128) - for most OpenCL implementations it must be a power of two. * comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows * to use a intensity which is not the multiple of the worksize. * If you set false and the intensity is not multiple of the worksize the miner can crash: diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index cd24869..777dbdb 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -151,9 +151,9 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) cfg.memChunk = (int)memChunk->GetInt64(); - if(!unroll->IsUint64() || (int)unroll->GetInt64() >= 128 ) + if(!unroll->IsUint64() || (int)unroll->GetInt64() >= 128 || ) { - printer::inst()->print_msg(L0, "ERROR: unroll must be smaller than 128"); + printer::inst()->print_msg(L0, "ERROR: unroll must be smaller than 128 and a power of two"); return false; } cfg.unroll = (int)unroll->GetInt64(); diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S index 3aa8994..b6be943 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S @@ -14,7 +14,7 @@ ALIGN 8 FN_PREFIX(cryptonight_v8_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi - #include "cryptonight_v8_main_loop_ivybridge_linux.inc" + #include "cryptonight_v8_main_loop_ivybridge_linux.inc" add rsp, 48 ret 0 @@ -22,6 +22,6 @@ ALIGN 8 FN_PREFIX(cryptonight_v8_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi - #include "cryptonight_v8_main_loop_ryzen_linux.inc" + #include "cryptonight_v8_main_loop_ryzen_linux.inc" add rsp, 48 ret 0 diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm index 3c2bba6..a1615e9 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm @@ -4,13 +4,13 @@ PUBLIC cryptonight_v8_mainloop_ryzen_asm ALIGN 8 cryptonight_v8_mainloop_ivybridge_asm PROC - INCLUDE cryptonight_v8_main_loop_ivybridge_win64.inc + INCLUDE cryptonight_v8_main_loop_ivybridge_win64.inc ret 0 cryptonight_v8_mainloop_ivybridge_asm ENDP ALIGN 8 cryptonight_v8_mainloop_ryzen_asm PROC - INCLUDE cryptonight_v8_main_loop_ryzen_win64.inc + INCLUDE cryptonight_v8_main_loop_ryzen_win64.inc ret 0 cryptonight_v8_mainloop_ryzen_asm ENDP diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc index 23f6cc0..21f1f48 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc @@ -157,14 +157,15 @@ sqrt_fixup_ivybridge_ret: sqrt_fixup_ivybridge: dec rdx - movq r13, -4389456576512 + mov r13d, -1022 + shl r13, 32 mov rax, rdx shr rdx, 19 shr rax, 20 mov rcx, rdx sub rcx, rax add rax, r13 - movq r13, 4389456576511 + not r13 sub rcx, r13 mov r13d, -2147483647 imul rcx, rax diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc index 551ee85..9c177b8 100644 --- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc +++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc @@ -157,14 +157,14 @@ sqrt_fixup_ryzen_ret: sqrt_fixup_ryzen: movq r9, xmm2 dec rdi - movq rdx, 4389456576511 + mov edx, -1022 + shl rdx, 32 mov rax, rdi shr rdi, 19 shr rax, 20 mov rcx, rdi sub rcx, rax - sub rcx, rdx - movq rdx, -4389456576512 + lea rcx, [rcx+rdx+1] add rax, rdx imul rcx, rax sub rcx, r9 diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 844e4c0..6edae90 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -441,8 +441,8 @@ inline uint64_t int_sqrt33_1_double_precision(const uint64_t n0) _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r); #else // GCC versions prior to 7 don't generate correct assembly for _subborrow_u64 -> _addcarry_u64 sequence - // Fallback to simpler code - if (x2 < n0) ++r; + // Fallback to simpler code + if (x2 < n0) ++r; #endif return r; } @@ -733,7 +733,7 @@ inline void set_float_rounding_mode() /** add append n to all arguments and keeps n as first argument * * @param n number which is appended to the arguments (expect the first argument n) - * + * * @code{.cpp} * CN_ENUM_2(1, foo, bar) * // is transformed to diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 2f01d5e..05743ae 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -453,7 +453,7 @@ template<size_t N> minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo, const std::string& asm_version_str) { static_assert(N >= 1, "number of threads must be >= 1" ); - + // check for asm optimized version for cryptonight_v8 if(N == 1 && algo == cryptonight_monero_v8 && bHaveAes) { -- GitLab