From 77160cf13a2beaf23c6fa2fad5180080b66583a0 Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocryptHPC@gmail.com> Date: Wed, 19 Sep 2018 11:54:45 +0200 Subject: [PATCH] fix nicehash `invalid results` If the first bit of the nonce is `1` (this is very often if we use a nicehash pool) than it could be that some OpenCL implementations handle the 64bit representation of the 32bit nonce on the device side as signed integer. During a right bitshift we pull wrong ones from the wrong higher part of the 64bit nonce representation into the 32bit part of the nonce. The result will be that the computed share is invalid. - explicit cast the nonce on the device to `uint` to avoid any side effects --- .../backend/amd/amd_gpu/opencl/cryptonight.cl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 002472d..78cd30c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -482,9 +482,14 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, State[10] = input[10]; ((uint *)State)[9] &= 0x00FFFFFFU; - ((uint *)State)[9] |= ((get_global_id(0)) & 0xFF) << 24; + ((uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24; ((uint *)State)[10] &= 0xFF000000U; - ((uint *)State)[10] |= ((get_global_id(0) >> 8)); + /* explicit cast to `uint` is required because some OpenCL implementations (e.g. NVIDIA) + * handle get_global_id and get_global_offset as signed long long int and add + * 0xFFFFFFFF... to `get_global_id` if we set on host side a 32bit offset where the first bit is `1` + * (even if it is correct casted to unsigned on the host) + */ + ((uint *)State)[10] |= (((uint)get_global_id(0) >> 8)); for(int i = 11; i < 25; ++i) State[i] = 0x00UL; @@ -605,7 +610,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states tweak1_2 = as_uint2(input[4]); tweak1_2.s0 >>= 24; tweak1_2.s0 |= tweak1_2.s1 << 8; - tweak1_2.s1 = get_global_id(0); + tweak1_2.s1 = (uint)get_global_id(0); tweak1_2 ^= as_uint2(states[24]); #endif } @@ -918,7 +923,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) - output[outIdx] = BranchBuf[idx] + get_global_offset(0); + output[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0); } } mem_fence(CLK_GLOBAL_MEM_FENCE); @@ -994,7 +999,7 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) - output[outIdx] = BranchBuf[idx] + get_global_offset(0); + output[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0); } } } @@ -1072,7 +1077,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) - output[outIdx] = BranchBuf[idx] + get_global_offset(0); + output[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0); } } } @@ -1133,7 +1138,7 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global { ulong outIdx = atomic_inc(output + 0xFF); if(outIdx < 0xFF) - output[outIdx] = BranchBuf[idx] + get_global_offset(0); + output[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0); } } } -- GitLab