From 28f41a6e8a4f562272f86c8de9b582e530a4221f Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocryptHPC@gmail.com> Date: Sun, 16 Sep 2018 20:38:10 +0200 Subject: [PATCH] AMD: add unroll option add option `unroll` for OpenCL to allow better tuning the main POW kernel. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 1 + xmrstak/backend/amd/amd_gpu/gpu.hpp | 1 + xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl | 2 +- xmrstak/backend/amd/autoAdjust.hpp | 2 +- xmrstak/backend/amd/config.tpl | 4 +++- xmrstak/backend/amd/jconf.cpp | 12 ++++++++++-- xmrstak/backend/amd/jconf.hpp | 1 + xmrstak/backend/amd/minethd.cpp | 1 + 8 files changed, 19 insertions(+), 5 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index bb39c57..767e538 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -405,6 +405,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ options += " -DCOMP_MODE=" + std::to_string(ctx->compMode ? 1u : 0u); options += " -DMEMORY=" + std::to_string(hashMemSize); options += " -DALGO=" + std::to_string(miner_algo[ii]); + options += " -DCN_UNROLL=" + std::to_string(ctx->unroll); /* create a hash for the compile time cache * used data: diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 5ab80b8..63c5029 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -27,6 +27,7 @@ struct GpuContext size_t workSize; int stridedIndex; int memChunk; + int unroll = 0; bool isNVIDIA = false; int compMode; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 9f474da..7d0ad18 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -671,7 +671,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states { ulong idx0 = a[0] & MASK; - #pragma unroll 8 + #pragma unroll CN_UNROLL for(int i = 0; i < ITERATIONS; ++i) { ulong c[2]; diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 4a2ffdb..c5b331c 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -172,7 +172,7 @@ private: conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + " \"affine_to_cpu\" : false, \"strided_index\" : " + std::to_string(ctx.stridedIndex) + ", \"mem_chunk\" : 2,\n" - " \"comp_mode\" : true\n" + + " \"unroll\" : 8, \"comp_mode\" : true\n" + " },\n"; } else diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl index 28855f0..0101b7e 100644 --- a/xmrstak/backend/amd/config.tpl +++ b/xmrstak/backend/amd/config.tpl @@ -13,13 +13,15 @@ R"===( * mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk * this value is only used if 'strided_index' == 2 * element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte) + * unroll - allow to control how often the POW main loop is unrolled; valid range [0;128] * comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows * to use a intensity which is not the multiple of the worksize. * If you set false and the intensity is not multiple of the worksize the miner can crash: * in this case set the intensity to a multiple of the worksize or activate comp_mode. * "gpu_threads_conf" : * [ - * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, "strided_index" : true, "mem_chunk" : 2, "comp_mode" : true }, + * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, + * "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true }, * ], * If you do not wish to mine with your AMD GPU(s) then use: * "gpu_threads_conf" : diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp index 9e15c93..cd24869 100644 --- a/xmrstak/backend/amd/jconf.cpp +++ b/xmrstak/backend/amd/jconf.cpp @@ -106,17 +106,18 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) if(!oThdConf.IsObject()) return false; - const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk, *compMode; + const Value *idx, *intensity, *w_size, *aff, *stridedIndex, *memChunk, *unroll, *compMode; idx = GetObjectMember(oThdConf, "index"); intensity = GetObjectMember(oThdConf, "intensity"); w_size = GetObjectMember(oThdConf, "worksize"); aff = GetObjectMember(oThdConf, "affine_to_cpu"); stridedIndex = GetObjectMember(oThdConf, "strided_index"); memChunk = GetObjectMember(oThdConf, "mem_chunk"); + unroll = GetObjectMember(oThdConf, "unroll"); compMode = GetObjectMember(oThdConf, "comp_mode"); if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr || memChunk == nullptr || - stridedIndex == nullptr || compMode == nullptr) + stridedIndex == nullptr || unroll == nullptr || compMode == nullptr) return false; if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) @@ -149,6 +150,13 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) } cfg.memChunk = (int)memChunk->GetInt64(); + + if(!unroll->IsUint64() || (int)unroll->GetInt64() >= 128 ) + { + printer::inst()->print_msg(L0, "ERROR: unroll must be smaller than 128"); + return false; + } + cfg.unroll = (int)unroll->GetInt64(); if(!compMode->IsBool()) return false; diff --git a/xmrstak/backend/amd/jconf.hpp b/xmrstak/backend/amd/jconf.hpp index 580b69f..b852c59 100644 --- a/xmrstak/backend/amd/jconf.hpp +++ b/xmrstak/backend/amd/jconf.hpp @@ -28,6 +28,7 @@ public: long long cpu_aff; int stridedIndex; int memChunk; + int unroll; bool compMode; }; diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index d6051ff..5ac2463 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -99,6 +99,7 @@ bool minethd::init_gpus() vGpuData[i].stridedIndex = cfg.stridedIndex; vGpuData[i].memChunk = cfg.memChunk; vGpuData[i].compMode = cfg.compMode; + vGpuData[i].unroll = cfg.unroll; } return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; -- GitLab