Skip to content
Snippets Groups Projects
Unverified Commit 87137162 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #2072 from fireice-uk/dev

Merge latest changes for next release
parents 28f27d31 447fef4b
No related branches found
No related tags found
No related merge requests found
...@@ -969,7 +969,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar ...@@ -969,7 +969,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
input[input_len] = 0x01; input[input_len] = 0x01;
memset(input + input_len + 1, 0, 88 - input_len - 1); memset(input + input_len + 1, 0, 88 - input_len - 1);
size_t numThreads = ctx->rawIntensity; cl_uint numThreads = ctx->rawIntensity;
if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS) if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS)
{ {
...@@ -998,7 +998,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar ...@@ -998,7 +998,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
} }
// Threads // Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{ {
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret)); printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret));
return(ERR_OCL_API); return(ERR_OCL_API);
...@@ -1021,7 +1021,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar ...@@ -1021,7 +1021,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
} }
// Threads // Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{ {
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret)); printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
return(ERR_OCL_API); return(ERR_OCL_API);
...@@ -1081,7 +1081,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar ...@@ -1081,7 +1081,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
} }
// Threads // Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS) if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{ {
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret)); printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
return(ERR_OCL_API); return(ERR_OCL_API);
...@@ -1160,7 +1160,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) ...@@ -1160,7 +1160,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
clFinish(ctx->CommandQueues); clFinish(ctx->CommandQueues);
size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 }; size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { 8, 8 };
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
{ {
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0); printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
...@@ -1212,7 +1212,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) ...@@ -1212,7 +1212,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
if(BranchNonces[i]) if(BranchNonces[i])
{ {
// Threads // Threads
if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS) cl_uint numThreads = BranchNonces[i];
if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{ {
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4); printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
return(ERR_OCL_API); return(ERR_OCL_API);
......
This diff is collapsed.
...@@ -2,49 +2,24 @@ R"===( ...@@ -2,49 +2,24 @@ R"===(
#ifndef FAST_DIV_HEAVY_CL #ifndef FAST_DIV_HEAVY_CL
#define FAST_DIV_HEAVY_CL #define FAST_DIV_HEAVY_CL
inline ulong get_reciprocal_heavy(uint a) inline long fast_div_heavy(long _a, int _b)
{ {
const uint shift = clz(a); long a = abs(_a);
a <<= shift; int b = abs(_b);
const float a_hi = as_float((a >> 8) + 1 + ((126U + 31U) << 23));
const float a_lo = convert_float_rte(as_int(a & 0xFF) - 256);
const float r = native_recip(a_hi);
const uint tmp0 = as_uint(r);
const uint tmp1 = tmp0 + ((shift + 2 + 64U) << 23);
const float r_scaled = as_float(tmp1);
const float h = fma(a_lo, r, fma(a_hi, r, -1.0f));
const float r_scaled_hi = as_float(tmp1 & ~4095U);
const float h_hi = as_float(as_uint(h) & ~4095U);
const float r_scaled_lo = r_scaled - r_scaled_hi; float rcp = native_recip(convert_float_rte(b));
const float h_lo = h - h_hi; float rcp2 = as_float(as_uint(rcp) + (32U << 23));
const float x1 = h_hi * r_scaled_hi; ulong q1 = convert_ulong_rte(convert_float_rte(as_int2(a).s1) * rcp2);
const float x2 = h_lo * r_scaled + h_hi * r_scaled_lo; a -= q1 * as_uint(b);
const long h1 = convert_long_rte(x1); long q2 = convert_long_rte(convert_float_rtn(a) * rcp);
const int h2 = convert_int_rtp(x2) - convert_int_rtn(h * (x1 + x2)); int a2 = as_int2(a).s0 - as_int2(q2).s0 * b;
const ulong result = tmp0 & 0xFFFFFF;
return (result << (shift + 9)) - ((h1 + h2) >> 2);
}
inline long fast_div_heavy(long _a, int _b)
{
const ulong a = abs(_a);
const uint b = abs(_b);
ulong q = mul_hi(a, get_reciprocal_heavy(b));
const long tmp = a - q * b; int q3 = convert_int_rte(convert_float_rte(a2) * rcp);
const int overshoot = (tmp < 0) ? 1 : 0; q3 += (a2 - q3 * b) >> 31;
const int undershoot = (tmp >= b) ? 1 : 0;
q += undershoot - overshoot;
const long q = q1 + q2 + q3;
return ((as_int2(_a).s1 ^ _b) < 0) ? -q : q; return ((as_int2(_a).s1 ^ _b) < 0) ? -q : q;
} }
......
...@@ -125,3 +125,4 @@ inline uint fast_sqrt_v2(const ulong n1) ...@@ -125,3 +125,4 @@ inline uint fast_sqrt_v2(const ulong n1)
#endif #endif
)===" )==="
\ No newline at end of file
...@@ -60,11 +60,29 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork) ...@@ -60,11 +60,29 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>; std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>;
#ifndef CONF_NO_OPENCL
if(params::inst().useAMD)
{
const std::string backendName = xmrstak::params::inst().openCLVendor;
plugin amdplugin;
amdplugin.load(backendName, "xmrstak_opencl_backend");
std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
size_t numWorkers = 0u;
if(amdThreads != nullptr)
{
pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
numWorkers = amdThreads->size();
delete amdThreads;
}
if(numWorkers == 0)
printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
}
#endif
#ifndef CONF_NO_CUDA #ifndef CONF_NO_CUDA
if(params::inst().useNVIDIA) if(params::inst().useNVIDIA)
{ {
plugin nvidiaplugin; plugin nvidiaplugin;
std::vector<iBackend*>* nvidiaThreads;
std::vector<std::string> libNames = {"xmrstak_cuda_backend_cuda10_0", "xmrstak_cuda_backend_cuda9_2", "xmrstak_cuda_backend"}; std::vector<std::string> libNames = {"xmrstak_cuda_backend_cuda10_0", "xmrstak_cuda_backend_cuda9_2", "xmrstak_cuda_backend"};
size_t numWorkers = 0u; size_t numWorkers = 0u;
...@@ -96,25 +114,6 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork) ...@@ -96,25 +114,6 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
} }
#endif #endif
#ifndef CONF_NO_OPENCL
if(params::inst().useAMD)
{
const std::string backendName = xmrstak::params::inst().openCLVendor;
plugin amdplugin;
amdplugin.load(backendName, "xmrstak_opencl_backend");
std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
size_t numWorkers = 0u;
if(amdThreads != nullptr)
{
pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
numWorkers = amdThreads->size();
delete amdThreads;
}
if(numWorkers == 0)
printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
}
#endif
#ifndef CONF_NO_CPU #ifndef CONF_NO_CPU
if(params::inst().useCPU) if(params::inst().useCPU)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment