Skip to content
Snippets Groups Projects
Commit 7008cbe1 authored by psychocrypt's avatar psychocrypt
Browse files

OpenCl: use user defined unroll in cn_gpu

- use the user defined unroll
- auto suggestion:
  - only tune for cn_gpu if this is the main user
currency (after a fork)
  - set unroll to 1 for cn_gpu
parent b73401b7
No related branches found
No related tags found
No related merge requests found
...@@ -225,6 +225,7 @@ __kernel void JOIN(cn1_cn_gpu,ALGO)(__global int *lpad_in, __global int *spad, u ...@@ -225,6 +225,7 @@ __kernel void JOIN(cn1_cn_gpu,ALGO)(__global int *lpad_in, __global int *spad, u
uint s = ((__global uint*)spad)[idxHash * 50] >> 8; uint s = ((__global uint*)spad)[idxHash * 50] >> 8;
float4 vs = (float4)(0); float4 vs = (float4)(0);
#pragma unroll CN_UNROLL
for(size_t i = 0; i < ITERATIONS; i++) for(size_t i = 0; i < ITERATIONS; i++)
{ {
mem_fence(CLK_LOCAL_MEM_FENCE); mem_fence(CLK_LOCAL_MEM_FENCE);
......
...@@ -137,8 +137,8 @@ private: ...@@ -137,8 +137,8 @@ private:
// true for all cryptonight_heavy derivates since we check the user and dev pool // true for all cryptonight_heavy derivates since we check the user and dev pool
bool useCryptonight_heavy = std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_heavy) != neededAlgorithms.end(); bool useCryptonight_heavy = std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_heavy) != neededAlgorithms.end();
// true for all cryptonight_gpu derivates since we check the user and dev pool // true for cryptonight_gpu as main user pool algorithm
bool useCryptonight_gpu = std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_gpu) != neededAlgorithms.end(); bool useCryptonight_gpu = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_gpu;
// set strided index to default // set strided index to default
ctx.stridedIndex = 1; ctx.stridedIndex = 1;
...@@ -161,12 +161,15 @@ private: ...@@ -161,12 +161,15 @@ private:
if (hashMemSize <= CRYPTONIGHT_TURTLE_MEMORY) if (hashMemSize <= CRYPTONIGHT_TURTLE_MEMORY)
maxThreads *= 4u; maxThreads *= 4u;
uint32_t numUnroll = 8;
if(useCryptonight_gpu) if(useCryptonight_gpu)
{ {
// 6 waves per compute unit are a good value (based on profiling) // 6 waves per compute unit are a good value (based on profiling)
// @todo check again after all optimizations // @todo check again after all optimizations
maxThreads = ctx.computeUnits * 6 * 8; maxThreads = ctx.computeUnits * 6 * 8;
ctx.stridedIndex = 0; ctx.stridedIndex = 0;
numUnroll = 1;
} }
// keep 128MiB memory free (value is randomly chosen) from the max available memory // keep 128MiB memory free (value is randomly chosen) from the max available memory
...@@ -210,7 +213,7 @@ private: ...@@ -210,7 +213,7 @@ private:
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
" \"affine_to_cpu\" : false, \"strided_index\" : " + std::to_string(ctx.stridedIndex) + ", \"mem_chunk\" : 2,\n" " \"affine_to_cpu\" : false, \"strided_index\" : " + std::to_string(ctx.stridedIndex) + ", \"mem_chunk\" : 2,\n"
" \"unroll\" : 8, \"comp_mode\" : true, \"interleave\" : " + std::to_string(ctx.interleave) + "\n" + " \"unroll\" : " + std::to_string(numUnroll) + ", \"comp_mode\" : true, \"interleave\" : " + std::to_string(ctx.interleave) + "\n" +
" },\n"; " },\n";
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment