diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp index 27783acd10aee5951293a7179d692c0155a02527..2755e03d2f66fbc9668f1b1e2a0d0f3d165d6e91 100644 --- a/xmrstak/backend/nvidia/autoAdjust.hpp +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -96,7 +96,7 @@ private: " \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" + " \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" + " \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" + - " \"comp_mode\" : false,\n" + + " \"mem_mode\" : 1,\n" + " },\n"; } } diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl index 8803f6ff20e7af2129419f10534185a567d585de..8a5982b57cb05316e222390ec311ce84a93fcbe4 100644 --- a/xmrstak/backend/nvidia/config.tpl +++ b/xmrstak/backend/nvidia/config.tpl @@ -16,9 +16,9 @@ R"===(// generated by XMRSTAK_VERSION * 1 = cudaDeviceScheduleSpin - create a high load on one cpu thread per gpu * 2 = cudaDeviceScheduleYield * 3 = cudaDeviceScheduleBlockingSync (default) - * comp_mode - Compatibility if true it will use 64bit memory loads and if false it will use - * 256bit memory loads (can produce invalid results) - * (this option has only a meaning for cryptonight_v8 and monero) + * mem_mode - select the memory access pattern (this option has only a meaning for cryptonight_v8 and monero) + * 0 = 64bit memory loads + * 1 = 256bit memory loads * * On the first run the miner will look at your system and suggest a basic configuration that will work, * you can try to tweak it from there to get the best performance. @@ -27,7 +27,7 @@ R"===(// generated by XMRSTAK_VERSION * "gpu_threads_conf" : * [ * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, - * "affine_to_cpu" : false, "sync_mode" : 3, + * "affine_to_cpu" : false, "sync_mode" : 3, "mem_mode" : 1 * }, * ], * If you do not wish to mine with your nVidia GPU(s) then use: diff --git a/xmrstak/backend/nvidia/jconf.cpp b/xmrstak/backend/nvidia/jconf.cpp index b1059f359313b6e165251f6e902aa99bc51cf939..6c443343b91310d9698d9466f241b16018c884e4 100644 --- a/xmrstak/backend/nvidia/jconf.cpp +++ b/xmrstak/backend/nvidia/jconf.cpp @@ -123,7 +123,7 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff, *syncMode, *compMode; + const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff, *syncMode, *memMode; gid = GetObjectMember(oThdConf, "index"); blocks = GetObjectMember(oThdConf, "blocks"); threads = GetObjectMember(oThdConf, "threads"); @@ -131,11 +131,11 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) bsleep = GetObjectMember(oThdConf, "bsleep"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); syncMode = GetObjectMember(oThdConf, "sync_mode"); - compMode = GetObjectMember(oThdConf, "comp_mode"); + memMode = GetObjectMember(oThdConf, "mem_mode"); if(gid == nullptr || blocks == nullptr || threads == nullptr || bfactor == nullptr || bsleep == nullptr || aff == nullptr || syncMode == nullptr || - compMode == nullptr) + memMode == nullptr) { return false; } @@ -160,12 +160,15 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) if(!syncMode->IsNumber() || syncMode->GetInt() < 0 || syncMode->GetInt() > 3) { - printer::inst()->print_msg(L0, "Error NVIDIA: sync_mode out of range or no number. ( range: 0 <= sync_mode < 4.)"); + printer::inst()->print_msg(L0, "Error NVIDIA: sync_mode out of range or not a number. ( range: 0 <= sync_mode < 4.)"); return false; } - if(!compMode->IsBool()) + if(!memMode->IsNumber() || memMode->GetInt() < 0 || memMode->GetInt() > 1) + { + printer::inst()->print_msg(L0, "Error NVIDIA: mem_mode out of range or not a number. (range: 0 or 1)"); return false; + } cfg.id = gid->GetInt(); @@ -174,7 +177,7 @@ bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) cfg.bfactor = bfactor->GetInt(); cfg.bsleep = bsleep->GetInt(); cfg.syncMode = syncMode->GetInt(); - cfg.compMode = compMode->GetBool(); + cfg.memMode = memMode->GetInt(); if(aff->IsNumber()) cfg.cpu_aff = aff->GetInt(); diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp index 5ee1f81339fd2c9d35932c05022a427711f1f477..40b72f880898485058c2775b11afdc9c0dfaff1d 100644 --- a/xmrstak/backend/nvidia/jconf.hpp +++ b/xmrstak/backend/nvidia/jconf.hpp @@ -29,7 +29,7 @@ public: bool bNoPrefetch; int32_t cpu_aff; int syncMode; - bool compMode; + int memMode; long long iCpuAff; }; diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 135f26ea9768a4bb8cb84546e06b583c2c5a69fc..e82ec91c3d783c3bbc3cc800aada753b45f0b566 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -78,7 +78,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) ctx.device_bfactor = (int)cfg.bfactor; ctx.device_bsleep = (int)cfg.bsleep; ctx.syncMode = cfg.syncMode; - ctx.compMode = cfg.compMode; + ctx.memMode = cfg.memMode; this->affinity = cfg.cpu_aff; std::future<void> numa_guard = numa_promise.get_future(); diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index 8167395e3ec3aa6c5247178e0d337575ae587636..8fda8d401323a185479bc5f2756cac3c5ca71af9 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -16,7 +16,7 @@ typedef struct { int device_bfactor; int device_bsleep; int syncMode; - bool compMode; + bool memMode; uint32_t *d_input; uint32_t inputlen; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 3dce3e4ac08f51648ec1002ee179026afa746eb8..00a65332ad5544a550b5c99e8e77dc473ee5199d 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -257,10 +257,10 @@ struct u64 : public uint2 /** cryptonight with two threads per hash * - * @tparam COMP_MODE if true than 64bit memory transfers per thread will be used to store/load data within shared memory - * else 128bit operations will be used + * @tparam MEM_MODE if `0` than 64bit memory transfers per thread will be used to store/load data within shared memory + * else if `1` 256bit operations will be used */ -template<size_t ITERATIONS, uint32_t MEMORY, uint32_t MASK, xmrstak_algo ALGO, bool COMP_MODE> +template<size_t ITERATIONS, uint32_t MEMORY, uint32_t MASK, xmrstak_algo ALGO, uint32_t MEM_MODE> #ifdef XMR_STAK_THREADS __launch_bounds__( XMR_STAK_THREADS * 2 ) #endif @@ -334,7 +334,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in { ptr0 = (uint64_t *)&l0[idx0 & MASK & 0x1FFFC0]; - if(COMP_MODE) + if(MEM_MODE == 0) { #pragma unroll 4 for(int x = 0; x < 8; x += 2) @@ -372,7 +372,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } myChunks[ idx1 + sub ] = cx_aes ^ bx0; - if(COMP_MODE) + if(MEM_MODE == 0) { #pragma unroll 4 for(int x = 0; x < 8; x += 2) @@ -387,7 +387,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in idx1 = (idx0 & 0x30) >> 3; ptr0 = (uint64_t *)&l0[idx0 & MASK & 0x1FFFC0]; - if(COMP_MODE) + if(MEM_MODE == 0) { #pragma unroll 4 for(int x = 0; x < 8; x += 2) @@ -452,7 +452,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in bx0 = cx_aes; } myChunks[ idx1 + sub ] = ax0; - if(COMP_MODE) + if(MEM_MODE == 0) { #pragma unroll 4 for(int x = 0; x < 8; x += 2) @@ -740,7 +740,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 ); } -template<size_t ITERATIONS, uint32_t MASK, uint32_t MEMORY, xmrstak_algo ALGO, bool COMP_MODE> +template<size_t ITERATIONS, uint32_t MASK, uint32_t MEMORY, xmrstak_algo ALGO, uint32_t MEM_MODE> void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) { dim3 grid( ctx->device_blocks ); @@ -782,7 +782,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_MSG_KERNEL( ctx->device_id, "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**", - cryptonight_core_gpu_phase2_double<ITERATIONS,MEMORY,MASK,ALGO, COMP_MODE><<< + cryptonight_core_gpu_phase2_double<ITERATIONS,MEMORY,MASK,ALGO, MEM_MODE><<< grid, block2, sizeof(uint64_t) * block2.x * 8 + @@ -855,42 +855,42 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t if(miner_algo == invalid_algo) return; static const cuda_hash_fn func_table[] = { - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, true>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, false>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, true> + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, 0>, + cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, 1> }; std::bitset<1> digit; - digit.set(0, ctx->compMode); + digit.set(0, ctx->memMode == 1); cuda_hash_fn selected_function = func_table[ ((miner_algo - 1u) << 1) | digit.to_ulong() ]; selected_function(ctx, startNonce);