Skip to content
Snippets Groups Projects
Commit 990c357c authored by psychocrypt's avatar psychocrypt
Browse files

improve auto suggestion

- amd/nvidia add hard limit that 64MiB memory keeps free
- improve nvidia auto suggestion (take care of lmem)
- add c++11 flag to nvidia compiler
parent 9e7efd40
No related branches found
No related tags found
No related merge requests found
......@@ -108,6 +108,11 @@ if(CUDA_ENABLE)
endforeach()
elseif("${CUDA_COMPILER}" STREQUAL "nvcc")
# add c++11 for cuda
if(NOT "${CMAKE_CXX_FLAGS}" MATCHES "-std=c\\+\\+11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
# avoid that nvcc in CUDA < 8 tries to use libc `memcpy` within the kernel
if(CUDA_VERSION VERSION_LESS 8.0)
add_definitions(-D_FORCE_INLINES)
......
......@@ -83,13 +83,15 @@ private:
int i = 0;
for(auto& ctx : devVec)
{
// use 90% of available memory
size_t availableMem = (ctx.freeMem * 100u) / 110;
size_t units = ctx.computeUnits;
// keep 64MiB memory free (value is randomly chosen)
size_t availableMem = ctx.freeMem - (64u * 1024 * 1024);
// 224byte extra memory is used per thread for meta data
size_t perThread = (size_t(1u)<<21) + 224u;
size_t max_intensity = availableMem / perThread;
// 1000 is a magic selected limit \todo select max intensity depending of the gpu type
size_t intensity = std::min( size_t(1000u) , max_intensity );
conf += std::string(" // gpu: ") + ctx.name + "\n";
conf += std::string(" // gpu: ") + ctx.name + "\n";
// set 8 threads per block (this is a good value for the most gpus)
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
" \"affine_to_cpu\" : false, \n"
......@@ -101,7 +103,7 @@ private:
configTpl.replace("NUMGPUS",std::to_string(devVec.size()));
configTpl.replace("GPUCONFIG",conf);
configTpl.write("amd.txt");
printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", "amd.txt");
printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", "amd.txt");
}
std::vector<GpuContext> devVec;
......
......@@ -85,11 +85,13 @@ private:
ConfigEditor configTpl{};
configTpl.set( std::string(tpl) );
constexpr size_t byte2mib = 1024u * 1024u;
std::string conf;
int i = 0;
for(auto& ctx : nvidCtxVec)
{
conf += std::string(" // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n";
conf += std::string(" // memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/" + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n";
conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
" \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
" \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" +
......@@ -100,7 +102,7 @@ private:
configTpl.replace("GPUCONFIG",conf);
configTpl.write("nvidia.txt");
printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", "nvidia.txt");
printer::inst()->print_msg(L0, "NVIDIA: GPU configuration stored in file '%s'", "nvidia.txt");
}
std::vector<nvid_ctx> nvidCtxVec;
......
#pragma once
#include <stdint.h>
#include <string>
typedef struct {
int device_id;
......@@ -23,6 +24,9 @@ typedef struct {
uint32_t *d_ctx_key1;
uint32_t *d_ctx_key2;
uint32_t *d_ctx_text;
std::string name;
size_t free_device_memory;
size_t total_device_memory;
} nvid_ctx;
extern "C" {
......
......@@ -4,6 +4,7 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_functions.hpp>
#include <algorithm>
#ifdef __CUDACC__
__constant__
......@@ -301,6 +302,8 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
ctx->device_arch[0] = props.major;
ctx->device_arch[1] = props.minor;
ctx->name = std::string(props.name);
// set all evice option those marked as auto (-1) to a valid value
if(ctx->device_blocks == -1)
{
......@@ -318,27 +321,46 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
* `8 * ctx->device_threads` threads per block
*/
ctx->device_threads = 64;
constexpr size_t byte2mib = 1024u * 1024u;
// no limit by default 1TiB
size_t maxMemUsage = byte2mib * byte2mib;
if(props.major < 6)
{
// try to stay under 950 threads ( 1900MiB memory per for hashes )
while(ctx->device_blocks * ctx->device_threads >= 950 && ctx->device_threads > 2)
{
ctx->device_threads /= 2;
}
// limit memory usage for GPUs before pascal
maxMemUsage = size_t(2048u) * byte2mib;
}
if(props.major == 2)
{
// limit memory usage for sm 20 GPUs
maxMemUsage = size_t(1024u) * byte2mib;
}
// stay within 85% of the available RAM
while(ctx->device_threads > 2)
size_t freeMemory = 0;
size_t totalMemory = 0;
CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
ctx->total_device_memory = totalMemory;
ctx->free_device_memory = freeMemory;
// keep 64MiB memory free (value is randomly chosen)
// 200byte are meta data memory (result nonce, ...)
size_t availableMem = freeMemory - (64u * 1024 * 1024) - 200u;
size_t limitedMemory = std::min(availableMem, maxMemUsage);
// up to 920bytes extra memory is used per thread for some kernel (lmem/local memory)
// 680bytes are extra meta data memory per hash
size_t perThread = size_t(MEMORY) + 740u + 680u;
size_t max_intensity = limitedMemory / perThread;
ctx->device_threads = max_intensity / ctx->device_blocks;
// use only odd number of threads
ctx->device_threads = ctx->device_threads & 0xFFFFFFFE;
if(props.major == 2 && ctx->device_threads > 64)
{
size_t freeMemory = 0;
size_t totalMemory = 0;
CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
freeMemory = (freeMemory * size_t(85)) / 100;
if( freeMemory > (size_t(ctx->device_blocks) * size_t(ctx->device_threads) * size_t(2u * 1024u * 1024u)) )
break;
else
ctx->device_threads /= 2;
// Fermi gpus only support 512 threads per block (we need start 4 * configured threads)
ctx->device_threads = 64;
}
}
return 1;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment