Skip to content
Snippets Groups Projects
Commit c528f51a authored by psychocrypt's avatar psychocrypt
Browse files

speedup Volta

- enable L1 cache for Nvidia Volta GPUs and newer
- remove explicit cache controll for Volta GPU and newer

This pull request increases the hash rate for Volta GPUs by ~5%
parent a8e83eec
No related branches found
No related tags found
No related merge requests found
......@@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m
template< typename T >
__device__ __forceinline__ T loadGlobal64( T * const addr )
{
#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) );
return x;
#else
return *addr;
#endif
}
template< typename T >
__device__ __forceinline__ T loadGlobal32( T * const addr )
{
#if (__CUDA_ARCH__ < 700)
T x;
asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) );
return x;
#else
return *addr;
#endif
}
template< typename T >
__device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
{
#if (__CUDA_ARCH__ < 700)
asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) );
#else
*addr = val;
#endif
}
template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
......
......@@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
break;
};
CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
/* Disable L1 cache for GPUs before Volta.
* L1 speed is increased and latency reduced with Volta.
*/
if(gpuArch < 70)
CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
size_t hashMemSize;
if(::jconf::inst()->IsCurrencyMonero())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment