Skip to content
Snippets Groups Projects
Commit e5127b5f authored by psychocrypt's avatar psychocrypt
Browse files

add message to `CUDA_CHECK...` macros

- add macro `CUDA_CHECK_MSG_KERNEL` and `CUDA_CHECK_MSG`
- add suggestion of typicle errors can be solved
parent 035c8242
No related branches found
No related tags found
No related merge requests found
...@@ -327,18 +327,22 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx) ...@@ -327,18 +327,22 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
for ( int i = 0; i < partcount; i++ ) for ( int i = 0; i < partcount; i++ )
{ {
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<< CUDA_CHECK_MSG_KERNEL(
grid, ctx->device_id,
block4, "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**",
block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
>>>( grid,
ctx->device_blocks*ctx->device_threads, block4,
ctx->device_bfactor, block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
i, >>>(
ctx->d_long_state, ctx->device_blocks*ctx->device_threads,
ctx->d_ctx_a, ctx->device_bfactor,
ctx->d_ctx_b i,
)); ctx->d_long_state,
ctx->d_ctx_a,
ctx->d_ctx_b
)
);
if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
} }
......
...@@ -9,22 +9,41 @@ ...@@ -9,22 +9,41 @@
/** execute and check a CUDA api command /** execute and check a CUDA api command
* *
* @param id gpu id (thread id) * @param id gpu id (thread id)
* @param msg message string which should be added to the error message
* @param ... CUDA api command * @param ... CUDA api command
*/ */
#define CUDA_CHECK(id, ...) { \ #define CUDA_CHECK_MSG(id, msg, ...) { \
cudaError_t error = __VA_ARGS__; \ cudaError_t error = __VA_ARGS__; \
if(error!=cudaSuccess){ \ if(error!=cudaSuccess){ \
std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__ << std::endl; \ std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__; \
throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \ std::cerr << msg << std::endl; \
} \ throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \
} \ } \
} \
( (void) 0 ) ( (void) 0 )
/** execute and check a CUDA api command
*
* @param id gpu id (thread id)
* @param ... CUDA api command
*/
#define CUDA_CHECK(id, ...) CUDA_CHECK_MSG(id, "", __VA_ARGS__)
/** execute and check a CUDA kernel /** execute and check a CUDA kernel
* *
* @param id gpu id (thread id) * @param id gpu id (thread id)
* @param ... CUDA kernel call * @param ... CUDA kernel call
*/ */
#define CUDA_CHECK_KERNEL(id, ...) \ #define CUDA_CHECK_KERNEL(id, ...) \
__VA_ARGS__; \ __VA_ARGS__; \
CUDA_CHECK(id, cudaGetLastError()) CUDA_CHECK(id, cudaGetLastError())
/** execute and check a CUDA kernel
*
* @param id gpu id (thread id)
* @param msg message string which should be added to the error message
* @param ... CUDA kernel call
*/
#define CUDA_CHECK_MSG_KERNEL(id, msg, ...) \
__VA_ARGS__; \
CUDA_CHECK_MSG(id, msg, cudaGetLastError())
...@@ -203,7 +203,6 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) ...@@ -203,7 +203,6 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
} }
size_t wsize = ctx->device_blocks * ctx->device_threads; size_t wsize = ctx->device_blocks * ctx->device_threads;
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize));
...@@ -213,6 +212,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) ...@@ -213,6 +212,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) ));
CUDA_CHECK_MSG(
ctx->device_id,
"\n**suggestion: Try to reduce the value of the attribute 'threads' in the NVIDIA config file.**",
cudaMalloc(&ctx->d_long_state, hashMemSize * wsize));
return 1; return 1;
} }
...@@ -239,7 +242,11 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, ...@@ -239,7 +242,11 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce,
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) ));
CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) ));
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )); CUDA_CHECK_MSG_KERNEL(
ctx->device_id,
"\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )
);
CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost )); CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost )); CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment