Skip to content
Snippets Groups Projects
Unverified Commit b1d8b550 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #2113 from psychocrypt/fix-OpenCLNvidia

OpenCl: fix NVIDIA
parents 35fb646c ab19d370
No related branches found
No related tags found
No related merge requests found
......@@ -421,7 +421,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
options += " -DSTRIDED_INDEX=" + std::to_string(strided_index);
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
options += " -DCOMP_MODE=" + std::to_string(needCompMode);
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LLU";
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU";
options += " -DALGO=" + std::to_string(miner_algo[ii]);
options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
/* AMD driver output is something like: `1445.5 (VM)`
......@@ -1276,7 +1276,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fetch results.", err_to_str(ret));
return ERR_OCL_API;
}
......
......@@ -426,8 +426,13 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
if (get_local_id(1) == 0)
{
__local ulong* State = State_buf + get_local_id(0) * 25;
// NVIDIA
#ifdef __NV_CL_C_VERSION
for(uint i = 0; i < 8; ++i)
State[i] = input[i];
#else
((__local ulong8 *)State)[0] = vload8(0, input);
#endif
State[8] = input[8];
State[9] = input[9];
State[10] = input[10];
......@@ -477,7 +482,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
mem_fence(CLK_LOCAL_MEM_FENCE);
// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
#if (ALGO == 4 || ALGO == 9 || ALGO == 10 || ALGO == 12)
__local uint4 xin[8][8];
{
......@@ -567,7 +572,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
// cryptonight_monero_v8
#if(ALGO==11)
# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
__local uint RCP[256];
# endif
......@@ -582,7 +587,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);
// cryptonight_monero_v8
#if(ALGO==11 && defined(__clang__))
#if(ALGO==11 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
RCP[i] = RCP_C[i];
#endif
}
......@@ -718,7 +723,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
// Quotient may be as large as (2^64 - 1)/(2^31 + 1) = 8589934588 = 2^33 - 4
// We drop the highest bit to fit both quotient and remainder in 32 bits
# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
division_result = fast_div_v2(RCP, c[1], d);
# else
division_result = fast_div_v2(c[1], d);
......
......@@ -43,7 +43,7 @@ static const __constant uint RCP_C[256] =
};
// Rocm produce invalid results if get_reciprocal without lookup table is used
#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)
inline uint get_reciprocal(const __local uchar *RCP, uint a)
{
......@@ -83,7 +83,7 @@ inline uint get_reciprocal(uint a)
#endif
#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)
inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b)
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment