thread affinity for non cpu backends

This is a follow up of #43 and use the some mechanism to set the thread affinity for non cpu backends correct. - use cpu affinity workflow for nvidia and amd - cpu: move messages of thread spawning before thread creation

thread affinity for non cpu backends
This is a follow up of #43 and use the some mechanism to set the thread affinity for non cpu backends correct. - use cpu affinity workflow for nvidia and amd - cpu: move messages of thread spawning before thread creation
b685c90f · psychocrypt · 712f7b7b · b685c90f · b685c90f · b685c90f
Commit b685c90f authored 7 years ago by psychocrypt
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -34,6 +34,7 @@
 #include "xmrstak/misc/executor.hpp"
 #include "xmrstak/misc/environment.hpp"
 #include "xmrstak/params.hpp"
+#include "xmrstak/backend/cpu/hwlocMemory.hpp"

 #include <assert.h>
 #include <cmath>
@@ -46,7 +47,7 @@ namespace xmrstak
 namespace amd
 {

-minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
+minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg)
 {
 	oWork = pWork;
 	bQuit = 0;
@@ -55,8 +56,16 @@ minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
 	iHashCount = 0;
 	iTimestamp = 0;
 	pGpuCtx = ctx;
+	this->affinity = cfg.cpu_aff;

+	std::future<void> order_guard = order_fix.get_future();
+	
 	oWorkThd = std::thread(&minethd::work_main, this);
+
+	order_guard.wait();
+
+	if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
+		printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
 }

 extern "C"  {
@@ -122,21 +131,20 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	for (i = 0; i < n; i++)
 	{
 		jconf::inst()->GetThreadConfig(i, cfg);
-		minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]);
-
+		
 		if(cfg.cpu_aff >= 0)
 		{
 #if defined(__APPLE__)
 			printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
 #endif
-			cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
+
+			printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
 		}
+		else
+			printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);

+		minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
 		pvThreads->push_back(thd);
-		if(cfg.cpu_aff >= 0)
-			printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
-		else
-			printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
 	}

 	return pvThreads;
@@ -166,6 +174,11 @@ void minethd::consume_work()

 void minethd::work_main()
 {
+	if(affinity >= 0) //-1 means no affinity
+		bindMemoryToNUMANode(affinity);
+
+	order_fix.set_value();
+	
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();

--- a/xmrstak/backend/amd/minethd.hpp
+++ b/xmrstak/backend/amd/minethd.hpp
@@ -9,6 +9,7 @@

 #include <thread>
 #include <atomic>
+#include <future>

 namespace xmrstak
 {
@@ -26,10 +27,9 @@ public:
 private:
 	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
 	
-	minethd(miner_work& pWork, size_t iNo, GpuContext* ctx);
+	minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg);
 	
 	void work_main();
-	void double_work_main();
 	void consume_work();

 	uint64_t iJobNo;
@@ -37,7 +37,10 @@ private:
 	static miner_work oGlobalWork;
 	miner_work oWork;

+	std::promise<void> order_fix;
+
 	std::thread oWorkThd;
+	int64_t affinity;

 	bool bQuit;
 	bool bNoPrefetch;

--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -259,10 +259,6 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 	{
 		jconf::inst()->GetThreadConfig(i, cfg);

-		// \todo need thread offset
-		minethd* thd = new minethd(pWork, i + threadOffset, cfg.bDoubleMode, cfg.bNoPrefetch, cfg.iCpuAff);
-		pvThreads.push_back(thd);
-
 		if(cfg.iCpuAff >= 0)
 		{
 #if defined(__APPLE__)
@@ -273,8 +269,12 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 		}
 		else
 			printer::inst()->print_msg(L1, "Starting %s thread, no affinity.", cfg.bDoubleMode ? "double" : "single");
+		
+		// \todo need thread offset
+		minethd* thd = new minethd(pWork, i + threadOffset, cfg.bDoubleMode, cfg.bNoPrefetch, cfg.iCpuAff);
+		pvThreads.push_back(thd);
 	}
-
+	
 	return pvThreads;
 }


--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -31,6 +31,7 @@
 #include "xmrstak/misc/executor.hpp"
 #include "xmrstak/jconf.hpp"
 #include "xmrstak/misc/environment.hpp"
+#include "xmrstak/backend/cpu/hwlocMemory.hpp"

 #include <assert.h>
 #include <cmath>
@@ -73,8 +74,16 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
 	ctx.device_threads = (int)cfg.threads;
 	ctx.device_bfactor = (int)cfg.bfactor;
 	ctx.device_bsleep = (int)cfg.bsleep;
+	this->affinity = cfg.cpu_aff;
+
+	std::future<void> order_guard = order_fix.get_future();
 	
 	oWorkThd = std::thread(&minethd::work_main, this);
+
+	order_guard.wait();
+
+	if(!cpu::minethd::thd_setaffinity(oWorkThd.native_handle(), affinity))
+		printer::inst()->print_msg(L1, "WARNING setting affinity failed.");
 }


@@ -147,22 +156,21 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 	for (i = 0; i < n; i++)
 	{
 		jconf::inst()->GetGPUThreadConfig(i, cfg);
-		minethd* thd = new minethd(pWork, i + threadOffset, cfg);

 		if(cfg.cpu_aff >= 0)
 		{
 #if defined(__APPLE__)
 			printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
 #endif
-			cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
-		}

+			printer::inst()->print_msg(L1, "Starting NVIDIA GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
+		}
+		else
+			printer::inst()->print_msg(L1, "Starting NVIDIA GPU thread %d, no affinity.", i);
+		
+		minethd* thd = new minethd(pWork, i + threadOffset, cfg);
 		pvThreads->push_back(thd);

-		if(cfg.cpu_aff >= 0)
-			printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
-		else
-			printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
 	}

 	return pvThreads;
@@ -191,6 +199,11 @@ void minethd::consume_work()

 void minethd::work_main()
 {
+	if(affinity >= 0) //-1 means no affinity
+		bindMemoryToNUMANode(affinity);
+
+	order_fix.set_value();
+	
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();

--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -12,6 +12,7 @@
 #include <thread>
 #include <atomic>
 #include <vector>
+#include <future>


 namespace xmrstak
@@ -43,7 +44,10 @@ private:
 	static miner_work oGlobalWork;
 	miner_work oWork;

+	std::promise<void> order_fix;
+
 	std::thread oWorkThd;
+	int64_t affinity;

 	nvid_ctx ctx;