Skip to content
Snippets Groups Projects
Unverified Commit 945524b3 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #1300 from psychocrypt/fix-sumokoinNvidiaBfactor

NVIDIA: fix sumokoin
parents 856e2d06 5f128134
No related branches found
No related tags found
No related merge requests found
...@@ -352,7 +352,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti ...@@ -352,7 +352,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
int sub = subv << 2; int sub = subv << 2;
const int batchsize = MEMORY >> bfactor; const int batchsize = MEMORY >> bfactor;
const int start = partidx * batchsize; const int start = (partidx % (1 << bfactor)) * batchsize;
const int end = start + batchsize; const int end = start + batchsize;
if ( thread >= threads ) if ( thread >= threads )
...@@ -365,15 +365,15 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti ...@@ -365,15 +365,15 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
__syncthreads( ); __syncthreads( );
#if( __CUDA_ARCH__ < 300 ) #if( __CUDA_ARCH__ < 300 )
extern __shared__ uint32_t shuffleMem[]; extern __shared__ uint32_t shuffleMem[];
volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC)); volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFF8));
#else #else
volatile uint32_t* sPtr = NULL; volatile uint32_t* sPtr = NULL;
#endif #endif
for ( int i = start; i < end; i += 32 ) for ( int i = start; i < end; i += 32 )
{ {
#pragma unroll #pragma unroll
for ( int j = 0; j < 4; ++j ) for ( int j = 0; j < 4; ++j )
text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
...@@ -381,25 +381,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti ...@@ -381,25 +381,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
if(ALGO == cryptonight_heavy) if(ALGO == cryptonight_heavy)
{ {
#pragma unroll #pragma unroll
for ( int j = 0; j < 4; ++j )
text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
}
}
if(ALGO == cryptonight_heavy)
{
__syncthreads( );
for ( int i = start; i < end; i += 32 )
{
#pragma unroll
for ( int j = 0; j < 4; ++j )
text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
cn_aes_pseudo_round_mut( sharedMemory, text, key );
#pragma unroll
for ( int j = 0; j < 4; ++j ) for ( int j = 0; j < 4; ++j )
text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7); text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
} }
...@@ -466,9 +448,21 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) ...@@ -466,9 +448,21 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce)
if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
} }
for ( int i = 0; i < partcountOneThree; i++ ) int roundsPhase3 = partcountOneThree;
if(ALGO == cryptonight_heavy)
{
// cryptonight_heavy used two full rounds over the scratchpad memory
roundsPhase3 *= 2;
}
for ( int i = 0; i < roundsPhase3; i++ )
{ {
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<<
grid,
block8,
block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
>>>( ctx->device_blocks*ctx->device_threads,
bfactorOneThree, i, bfactorOneThree, i,
ctx->d_long_state, ctx->d_long_state,
ctx->d_ctx_state, ctx->d_ctx_key2 )); ctx->d_ctx_state, ctx->d_ctx_key2 ));
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#endif #endif
#define XMR_STAK_NAME "xmr-stak" #define XMR_STAK_NAME "xmr-stak"
#define XMR_STAK_VERSION "2.4.1" #define XMR_STAK_VERSION "2.4.2"
#if defined(_WIN32) #if defined(_WIN32)
#define OS_TYPE "win" #define OS_TYPE "win"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment