From ed2168b48d16a9870cbef067d38a5ad16b26c9f9 Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocryptHPC@gmail.com> Date: Wed, 10 Oct 2018 11:52:40 +0200 Subject: [PATCH] CUDA: fix invalid results If `comp_mode` is false the results on a windows platform will be invalid. The reason for that is that `ulong4` is in windows 16byte and in linux 32byte. thx @xmrig for finding and solving the issue fix #1873 --- xmrstak/backend/nvidia/autoAdjust.hpp | 2 +- xmrstak/backend/nvidia/config.tpl | 2 +- xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index 6354f60..27783ac 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -96,7 +96,7 @@ private: " \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" + " \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" + " \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" + - " \"comp_mode\" : true,\n" + + " \"comp_mode\" : false,\n" + " },\n"; } } diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl index e2a76d9..8803f6f 100644 --- a/xmrstak/backend/nvidia/config.tpl +++ b/xmrstak/backend/nvidia/config.tpl @@ -17,7 +17,7 @@ R"===(// generated by XMRSTAK_VERSION * 2 = cudaDeviceScheduleYield * 3 = cudaDeviceScheduleBlockingSync (default) * comp_mode - Compatibility if true it will use 64bit memory loads and if false it will use - * 128bit memory loads (can produce invalid results) + * 256bit memory loads (can produce invalid results) * (this option has only a meaning for cryptonight_v8 and monero) * * On the first run the miner will look at your system and suggest a basic configuration that will work, diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 1c9c9df..3dce3e4 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -343,7 +343,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } } else - ((ulong4*)myChunks)[sub] = ((ulong4*)ptr0)[sub]; + ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; uint32_t idx1 = (idx0 & 0x30) >> 3; @@ -381,7 +381,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } } else - ((ulong4*)ptr0)[sub] = ((ulong4*)myChunks)[sub]; + ((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub]; idx0 = shuffle<2>(sPtr, sub, cx_aes.x, 0); idx1 = (idx0 & 0x30) >> 3; @@ -396,7 +396,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } } else - ((ulong4*)myChunks)[sub] = ((ulong4*)ptr0)[sub]; + ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; if(ALGO != cryptonight_monero_v8) bx0 = cx_aes; @@ -461,7 +461,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } } else - ((ulong4*)ptr0)[sub] = ((ulong4*)myChunks)[sub]; + ((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub]; ax0 ^= c; idx0 = shuffle<2>(sPtr, sub, static_cast<uint32_t>(ax0), 0); } -- GitLab