Skip to content
Snippets Groups Projects
Unverified Commit 98763bf0 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #1121 from psychocrypt/topic-speedupCUDAStartup

CUDA: reduce startup time
parents 266b2914 6488a026
No related branches found
No related tags found
No related merge requests found
......@@ -80,14 +80,22 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
ctx.syncMode = cfg.syncMode;
this->affinity = cfg.cpu_aff;
std::unique_lock<std::mutex> lck(thd_aff_set);
std::future<void> order_guard = order_fix.get_future();
std::future<void> numa_guard = numa_promise.get_future();
thread_work_guard = thread_work_promise.get_future();
oWorkThd = std::thread(&minethd::work_main, this);
order_guard.wait();
/* Wait until the gpu memory is initialized and numa cpu memory is pinned.
* The startup time is reduced if the memory is initialized in sequential order
* without concurrent threads (CUDA driver is less occupied).
*/
numa_guard.wait();
}
if(affinity >= 0) //-1 means no affinity
void minethd::start_mining()
{
thread_work_promise.set_value();
if(this->affinity >= 0) //-1 means no affinity
if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
}
......@@ -179,6 +187,11 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
}
for (i = 0; i < n; i++)
{
static_cast<minethd*>((*pvThreads)[i])->start_mining();
}
return pvThreads;
}
......@@ -208,10 +221,18 @@ void minethd::work_main()
if(affinity >= 0) //-1 means no affinity
bindMemoryToNUMANode(affinity);
order_fix.set_value();
std::unique_lock<std::mutex> lck(thd_aff_set);
lck.release();
if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
{
printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
std::exit(0);
}
// numa memory bind and gpu memory is initialized
numa_promise.set_value();
std::this_thread::yield();
// wait until all NVIDIA devices are initialized
thread_work_guard.wait();
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
......@@ -221,12 +242,6 @@ void minethd::work_main()
globalStates::inst().iConsumeCnt++;
if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
{
printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
std::exit(0);
}
bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero");
while (bQuit == 0)
......
......@@ -32,7 +32,8 @@ private:
typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg);
void start_mining();
void work_main();
void consume_work();
......@@ -44,8 +45,11 @@ private:
static miner_work oGlobalWork;
miner_work oWork;
std::promise<void> order_fix;
std::mutex thd_aff_set;
std::promise<void> numa_promise;
std::promise<void> thread_work_promise;
// block thread until all NVIDIA GPUs are initialized
std::future<void> thread_work_guard;
std::thread oWorkThd;
int64_t affinity;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment