diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index f7b47249eeb0bb64aa5d7429404ef8948d7a6271..d6051ffcdc740670bb01605c4950aaaae1155f3f 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -252,7 +252,7 @@ void minethd::work_main()
 
 				*(uint32_t*)(bWorkBlob + 39) = results[i];
 
-				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
+				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx);
 				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
 					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
 				else
diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
index 3142117c509b98c3858a82e136d93f229ea627b0..04c2ff8ad1a996d22fd8648953312bf6beaccdb7 100644
--- a/xmrstak/backend/amd/minethd.hpp
+++ b/xmrstak/backend/amd/minethd.hpp
@@ -24,7 +24,7 @@ public:
 	static bool init_gpus();
 
 private:
-	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
+	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**);
 
 	minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg);
 
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 9f70bcfa709d2a8be889c30f3766a6a812114d82..89c508990f82a28f4d14fcb9aedbb83bdf4e52d4 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -151,15 +151,15 @@ static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i
 
 inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
 {
-    __m128i tmp0 = x0;
-    x0 = _mm_xor_si128(x0, x1);
-    x1 = _mm_xor_si128(x1, x2);
-    x2 = _mm_xor_si128(x2, x3);
-    x3 = _mm_xor_si128(x3, x4);
-    x4 = _mm_xor_si128(x4, x5);
-    x5 = _mm_xor_si128(x5, x6);
-    x6 = _mm_xor_si128(x6, x7);
-    x7 = _mm_xor_si128(x7, tmp0);
+	__m128i tmp0 = x0;
+	x0 = _mm_xor_si128(x0, x1);
+	x1 = _mm_xor_si128(x1, x2);
+	x2 = _mm_xor_si128(x2, x3);
+	x3 = _mm_xor_si128(x3, x4);
+	x4 = _mm_xor_si128(x4, x5);
+	x5 = _mm_xor_si128(x5, x6);
+	x6 = _mm_xor_si128(x6, x7);
+	x7 = _mm_xor_si128(x7, tmp0);
 }
 
 template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO>
@@ -467,712 +467,325 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
 
 }
 
-template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
-void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
-{
-	constexpr size_t MASK = cn_select_mask<ALGO>();
-	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
-	constexpr size_t MEM = cn_select_memory<ALGO>();
-
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43)
-	{
-		memset(output, 0, 32);
-		return;
-	}
-
-	keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
-
-	uint64_t monero_const;
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
-	{
-		monero_const  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
-		monero_const ^=  *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24);
+#define CN_INIT_SINGLE \
+	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) \
+	{ \
+		memset(output, 0, 32 * N); \
+		return; \
 	}
 
-	// Optim - 99% time boundary
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);
-
-	uint8_t* l0 = ctx0->long_state;
-	uint64_t* h0 = (uint64_t*)ctx0->hash_state;
-
-	uint64_t al0 = h0[0] ^ h0[4];
-	uint64_t ah0 = h0[1] ^ h0[5];
-	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-
-	uint64_t idx0 = h0[0] ^ h0[4];
-
-	// Optim - 90% time boundary
-	for(size_t i = 0; i < ITERATIONS; i++)
-	{
-		__m128i cx;
-		cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]);
-
-		if (ALGO == cryptonight_bittube2)
-		{
-			cx = aes_round_bittube2(cx, _mm_set_epi64x(ah0, al0));
-		} 
-		else
-		{
-			if(SOFT_AES)
-				cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
-			else
-				cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
-		}
-
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
-			cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-		else
-			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-
-		idx0 = _mm_cvtsi128_si64(cx);
-
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
-		bx0 = cx;
-
-		uint64_t hi, lo, cl, ch;
-		cl = ((uint64_t*)&l0[idx0 & MASK])[0];
-		ch = ((uint64_t*)&l0[idx0 & MASK])[1];
-
-		lo = _umul128(idx0, cl, &hi);
-
-		al0 += hi;
-		((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-		al0 ^= cl;
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0);
-		ah0 += lo;
-
-		if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) {
-			if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2)
-				((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0];
-			else
-				((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
-		}
-		else
-			((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
-		ah0 ^= ch;
-
-		idx0 = al0;
+#define CN_INIT(n, monero_const, l0, ax0, bx0, idx0, ptr0) \
+	keccak((const uint8_t *)input + len * n, len, ctx[n]->hash_state, 200); \
+	uint64_t monero_const; \
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
+	{ \
+		monero_const =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + len * n + 35); \
+		monero_const ^=  *(reinterpret_cast<const uint64_t*>(ctx[n]->hash_state) + 24); \
+	} \
+	/* Optim - 99% time boundary */ \
+	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->hash_state, (__m128i*)ctx[n]->long_state); \
+	\
+	__m128i ax0; \
+	uint64_t idx0; \
+	__m128i bx0; \
+	uint8_t* l0 = ctx[n]->long_state; \
+	{ \
+		uint64_t* h0 = (uint64_t*)ctx[n]->hash_state; \
+		idx0 = h0[0] ^ h0[4]; \
+		ax0 = _mm_set_epi64x(h0[1] ^ h0[5], idx0); \
+		bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); \
+	} \
+	__m128i *ptr0
 
-		if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2)
-		{
-			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
-			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
-			int64_t q = n / (d | 0x5);
 
-			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-			idx0 = d ^ q;
-		}
-		else if(ALGO == cryptonight_haven)
-		{
-			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
-			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
-			int64_t q = n / (d | 0x5);
-
-			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-			idx0 = (~d) ^ q;
-		}
+#define CN_STEP1(n, monero_const, l0, ax0, bx0, idx0, ptr0, cx) \
+	__m128i cx; \
+	ptr0 = (__m128i *)&l0[idx0 & MASK]; \
+	cx = _mm_load_si128(ptr0); \
+	if (ALGO == cryptonight_bittube2) \
+	{ \
+		cx = aes_round_bittube2(cx, ax0); \
+	} \
+	else \
+	{ \
+		if(SOFT_AES) \
+			cx = soft_aesenc(cx, ax0); \
+		else \
+			cx = _mm_aesenc_si128(cx, ax0); \
 	}
 
-	// Optim - 90% time boundary
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
-
-	// Optim - 99% time boundary
-
-	keccakf((uint64_t*)ctx0->hash_state, 24);
-	extra_hashes[ctx0->hash_state[0] & 3](ctx0->hash_state, 200, (char*)output);
-}
-
-// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
-// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
-// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
-void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
-{
-	constexpr size_t MASK = cn_select_mask<ALGO>();
-	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
-	constexpr size_t MEM = cn_select_memory<ALGO>();
-
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43)
-	{
-		memset(output, 0, 64);
-		return;
-	}
+#define CN_STEP2(n, monero_const, l0, ax0, bx0, idx0, ptr0, cx) \
+	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
+		cryptonight_monero_tweak<ALGO>((uint64_t*)ptr0, _mm_xor_si128(bx0, cx)); \
+	else \
+		_mm_store_si128((__m128i *)ptr0, _mm_xor_si128(bx0, cx)); \
+	idx0 = _mm_cvtsi128_si64(cx); \
+	\
+	ptr0 = (__m128i *)&l0[idx0 & MASK]; \
+	if(PREFETCH) \
+		_mm_prefetch((const char*)ptr0, _MM_HINT_T0); \
+	bx0 = cx; \
+
+#define CN_STEP3(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0) \
+	uint64_t lo, cl, ch; \
+	uint64_t al0 = _mm_cvtsi128_si64(ax0); \
+	uint64_t ah0 = ((uint64_t*)&ax0)[1]; \
+	cl = ((uint64_t*)ptr0)[0]; \
+	ch = ((uint64_t*)ptr0)[1]; \
+	\
+	{ \
+		uint64_t hi; \
+		lo = _umul128(idx0, cl, &hi); \
+		ah0 += lo; \
+		al0 += hi; \
+	} \
+	((uint64_t*)ptr0)[0] = al0; \
+	if(PREFETCH) \
+		_mm_prefetch((const char*)ptr0, _MM_HINT_T0)
+	
 
-	keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200);
-	keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200);
+#define CN_STEP4(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0) \
+	if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
+	{ \
+		if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) \
+			((uint64_t*)ptr0)[1] = ah0 ^ monero_const ^ ((uint64_t*)ptr0)[0]; \
+		else \
+			((uint64_t*)ptr0)[1] = ah0 ^ monero_const; \
+	} \
+	else \
+		((uint64_t*)ptr0)[1] = ah0; \
+	al0 ^= cl; \
+	ah0 ^= ch; \
+	ax0 = _mm_set_epi64x(ah0, al0); \
+	idx0 = al0;
 
-	uint64_t monero_const_0, monero_const_1;
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
-	{
-		monero_const_0  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
-		monero_const_0 ^=  *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24);
-		monero_const_1  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + len + 35);
-		monero_const_1 ^=  *(reinterpret_cast<const uint64_t*>(ctx[1]->hash_state) + 24);
+#define CN_STEP5(n, monero_const, l0, ax0, bx0, idx0, ptr0) \
+	if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) \
+	{ \
+		ptr0 = (__m128i *)&l0[idx0 & MASK]; \
+		int64_t u  = ((int64_t*)ptr0)[0]; \
+		int32_t d  = ((int32_t*)ptr0)[2]; \
+		int64_t q = u / (d | 0x5); \
+		\
+		((int64_t*)ptr0)[0] = u ^ q; \
+		idx0 = d ^ q; \
+	} \
+	else if(ALGO == cryptonight_haven) \
+	{ \
+		ptr0 = (__m128i *)&l0[idx0 & MASK]; \
+		int64_t u  = ((int64_t*)ptr0)[0]; \
+		int32_t d  = ((int32_t*)ptr0)[2]; \
+		int64_t q = u / (d | 0x5); \
+		\
+		((int64_t*)ptr0)[0] = u ^ q; \
+		idx0 = (~d) ^ q; \
 	}
 
-	// Optim - 99% time boundary
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state);
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state);
-
-	uint8_t* l0 = ctx[0]->long_state;
-	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
-	uint8_t* l1 = ctx[1]->long_state;
-	uint64_t* h1 = (uint64_t*)ctx[1]->hash_state;
+#define CN_FINALIZE(n) \
+	/* Optim - 90% time boundary */ \
+	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->long_state, (__m128i*)ctx[n]->hash_state); \
+	/* Optim - 99% time boundary */ \
+	keccakf((uint64_t*)ctx[n]->hash_state, 24); \
+	extra_hashes[ctx[n]->hash_state[0] & 3](ctx[n]->hash_state, 200, (char*)output + 32 * n)
 
-	uint64_t axl0 = h0[0] ^ h0[4];
-	uint64_t axh0 = h0[1] ^ h0[5];
-	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-	uint64_t axl1 = h1[0] ^ h1[4];
-	uint64_t axh1 = h1[1] ^ h1[5];
-	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-
-	uint64_t idx0 = h0[0] ^ h0[4];
-	uint64_t idx1 = h1[0] ^ h1[4];
+//! defer the evaluation of an macro
+#ifndef _MSC_VER
+#	define CN_DEFER(...) __VA_ARGS__
+#else
+#	define CN_EMPTY(...)
+#	define CN_DEFER(...) __VA_ARGS__ CN_EMPTY()
+#endif
 
-	// Optim - 90% time boundary
-	for (size_t i = 0; i < ITERATIONS; i++)
+//! execute the macro f with the passed arguments
+#define CN_EXEC(f,...) CN_DEFER(f)(__VA_ARGS__)
+
+/** add append n to all arguments and keeps n as first argument
+ *
+ * @param n number which is appended to the arguments (expect the first argument n)
+ * 
+ * @code{.cpp}
+ * CN_ENUM_2(1, foo, bar)
+ * // is transformed to
+ * 1, foo1, bar1
+ * @endcode
+ */
+#define CN_ENUM_0(n, ...) n
+#define CN_ENUM_1(n, x1) n, x1 ## n
+#define CN_ENUM_2(n, x1, x2) n, x1 ## n, x2 ## n
+#define CN_ENUM_3(n, x1, x2, x3) n, x1 ## n, x2 ## n, x3 ## n
+#define CN_ENUM_4(n, x1, x2, x3, x4) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n
+#define CN_ENUM_5(n, x1, x2, x3, x4, x5) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n
+#define CN_ENUM_6(n, x1, x2, x3, x4, x5, x6) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n
+#define CN_ENUM_7(n, x1, x2, x3, x4, x5, x6, x7) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n
+#define CN_ENUM_8(n, x1, x2, x3, x4, x5, x6, x7, x8) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n
+#define CN_ENUM_9(n, x1, x2, x3, x4, x5, x6, x7, x8, x9) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n
+#define CN_ENUM_10(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n
+#define CN_ENUM_11(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n
+#define CN_ENUM_12(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n, x12 ## n
+
+/** repeat a macro call multiple times
+ *
+ * @param n number of arguments followed after f
+ * @param f name of the macro which should be executed
+ * @param ... n parameter which name will get appended by a unique number
+ *
+ * @code{.cpp}
+ * REPEAT_2(2, f, foo, bar)
+ * // is transformed to
+ * f(0, foo0, bar); f(1, foo1, bar1)
+ * @endcode
+ */
+#define REPEAT_1(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__))
+#define REPEAT_2(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__))
+#define REPEAT_3(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__))
+#define REPEAT_4(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(3, __VA_ARGS__))
+#define REPEAT_5(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(3, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(4, __VA_ARGS__))
+
+template< size_t N>
+struct Cryptonight_hash;
+
+template< >
+struct Cryptonight_hash<1>
+{
+	static constexpr size_t N = 1;
+	
+	template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
+	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 	{
-		__m128i cx;
-		cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]);
-
-		if (ALGO == cryptonight_bittube2)
-		{
-			cx = aes_round_bittube2(cx, _mm_set_epi64x(axh0, axl0));
-		} 
-		else
-		{
-			if(SOFT_AES)
-				cx = soft_aesenc(cx, _mm_set_epi64x(axh0, axl0));
-			else
-				cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
-		}
-
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
-			cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-		else
-			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-
-		idx0 = _mm_cvtsi128_si64(cx);
-		bx0 = cx;
-
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
-
-		cx = _mm_load_si128((__m128i *)&l1[idx1 & MASK]);
-
-		if (ALGO == cryptonight_bittube2)
-		{
-			cx = aes_round_bittube2(cx, _mm_set_epi64x(axh1, axl1));
-		} 
-		else
-		{
-			if(SOFT_AES)
-				cx = soft_aesenc(cx, _mm_set_epi64x(axh1, axl1));
-			else
-				cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
-		}
-
-		if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
-			cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
-		else
-			_mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
-
-		idx1 = _mm_cvtsi128_si64(cx);
-		bx1 = cx;
-
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
-
-		uint64_t hi, lo, cl, ch;
-		cl = ((uint64_t*)&l0[idx0 & MASK])[0];
-		ch = ((uint64_t*)&l0[idx0 & MASK])[1];
-
-		lo = _umul128(idx0, cl, &hi);
-
-		axl0 += hi;
-		axh0 += lo;
-		((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
-
-		if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) {
-			if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2)
-				((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0];
-			else
-				((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
-		} else
-			((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
-
-		axh0 ^= ch;
-		axl0 ^= cl;
-		idx0 = axl0;
+		constexpr size_t MASK = cn_select_mask<ALGO>();
+		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+		constexpr size_t MEM = cn_select_memory<ALGO>();
 
-		if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2)
-		{
-			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
-			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
-			int64_t q = n / (d | 0x5);
+		CN_INIT_SINGLE;
+		REPEAT_1(6, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0);
 
-			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-			idx0 = d ^ q;
-		}
-		else if(ALGO == cryptonight_haven)
+		// Optim - 90% time boundary
+		for(size_t i = 0; i < ITERATIONS; i++)
 		{
-			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
-			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
-			int64_t q = n / (d | 0x5);
 
-			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-			idx0 = (~d) ^ q;
+			REPEAT_1(7, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_1(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_1(11, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_1(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_1(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
 		}
 
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
-
-		cl = ((uint64_t*)&l1[idx1 & MASK])[0];
-		ch = ((uint64_t*)&l1[idx1 & MASK])[1];
-
-		lo = _umul128(idx1, cl, &hi);
-
-		axl1 += hi;
-		axh1 += lo;
-		((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
+		REPEAT_1(0, CN_FINALIZE);
+	}
+};
 
-		if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) {
-			if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2)
-				((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0];
-			else
-				((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
-		} else
-			((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
+template< >
+struct Cryptonight_hash<2>
+{
+	static constexpr size_t N = 2;
 
-		axh1 ^= ch;
-		axl1 ^= cl;
-		idx1 = axl1;
+	template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
+	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
+	{
+		constexpr size_t MASK = cn_select_mask<ALGO>();
+		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+		constexpr size_t MEM = cn_select_memory<ALGO>();
 
-		if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2)
-		{
-			int64_t n  = ((int64_t*)&l1[idx1 & MASK])[0];
-			int32_t d  = ((int32_t*)&l1[idx1 & MASK])[2];
-			int64_t q = n / (d | 0x5);
+		CN_INIT_SINGLE;
+		REPEAT_2(6, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0);
 
-			((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
-			idx1 = d ^ q;
-		}
-		else if(ALGO == cryptonight_haven)
+		// Optim - 90% time boundary
+		for(size_t i = 0; i < ITERATIONS; i++)
 		{
-			int64_t n  = ((int64_t*)&l1[idx1 & MASK])[0];
-			int32_t d  = ((int32_t*)&l1[idx1 & MASK])[2];
-			int64_t q = n / (d | 0x5);
-
-			((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
-			idx1 = (~d) ^ q;
+			REPEAT_2(7, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_2(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_2(11, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_2(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_2(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
 		}
 
-		if(PREFETCH)
-			_mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
+		REPEAT_2(0, CN_FINALIZE);
 	}
+};
 
-	// Optim - 90% time boundary
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state);
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state);
-
-	// Optim - 99% time boundary
-
-	keccakf((uint64_t*)ctx[0]->hash_state, 24);
-	extra_hashes[ctx[0]->hash_state[0] & 3](ctx[0]->hash_state, 200, (char*)output);
-	keccakf((uint64_t*)ctx[1]->hash_state, 24);
-	extra_hashes[ctx[1]->hash_state[0] & 3](ctx[1]->hash_state, 200, (char*)output + 32);
-}
-
-#define CN_STEP1(a, b, c, l, ptr, idx)				\
-	ptr = (__m128i *)&l[idx & MASK];			\
-	if(PREFETCH)						\
-		_mm_prefetch((const char*)ptr, _MM_HINT_T0);	\
-	c = _mm_load_si128(ptr);
-
-#define CN_STEP2(a, b, c, l, ptr, idx)				\
-	if (ALGO == cryptonight_bittube2)	\
-	{	\
-		c = aes_round_bittube2(c, a);	\
-	}	\
-	else	\
-	{	\
-		if(SOFT_AES)					\
-			c = soft_aesenc(c, a);			\
-		else						\
-			c = _mm_aesenc_si128(c, a);		\
-	} 							\
-	b = _mm_xor_si128(b, c);				\
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
-		cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \
-	else \
-		_mm_store_si128(ptr, b);\
-
-#define CN_STEP3(a, b, c, l, ptr, idx)				\
-	idx = _mm_cvtsi128_si64(c);				\
-	ptr = (__m128i *)&l[idx & MASK];			\
-	if(PREFETCH)						\
-		_mm_prefetch((const char*)ptr, _MM_HINT_T0);	\
-	b = _mm_load_si128(ptr);
-
-#define CN_STEP4(a, b, c, l, mc, ptr, idx)				\
-	lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi);		\
-	a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi));		\
-	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
-	{ \
-		_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
-		if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) \
-			((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\
-	} \
-	else \
-		_mm_store_si128(ptr, a);\
-	a = _mm_xor_si128(a, b); \
-	idx = _mm_cvtsi128_si64(a);	\
-	if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) \
-	{ \
-		int64_t n  = ((int64_t*)&l[idx & MASK])[0]; \
-		int32_t d  = ((int32_t*)&l[idx & MASK])[2]; \
-		int64_t q = n / (d | 0x5); \
-		((int64_t*)&l[idx & MASK])[0] = n ^ q; \
-		idx = d ^ q; \
-	} \
-	else if(ALGO == cryptonight_haven) \
-	{ \
-		int64_t n  = ((int64_t*)&l[idx & MASK])[0]; \
-		int32_t d  = ((int32_t*)&l[idx & MASK])[2]; \
-		int64_t q = n / (d | 0x5); \
-		((int64_t*)&l[idx & MASK])[0] = n ^ q; \
-		idx = (~d) ^ q; \
-	}
-
-#define CONST_INIT(ctx, n) \
-	__m128i mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + n * len + 35) ^ \
-	*(reinterpret_cast<const uint64_t*>((ctx)->hash_state) + 24), 0);
-
-// This lovelier creation will do 3 cn hashes at a time.
-template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
-void cryptonight_triple_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
+template< >
+struct Cryptonight_hash<3>
 {
-	constexpr size_t MASK = cn_select_mask<ALGO>();
-	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
-	constexpr size_t MEM = cn_select_memory<ALGO>();
+	static constexpr size_t N = 3;
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43)
+	template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
+	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 	{
-		memset(output, 0, 32 * 3);
-		return;
-	}
+		constexpr size_t MASK = cn_select_mask<ALGO>();
+		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+		constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	for (size_t i = 0; i < 3; i++)
-	{
-		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
-	}
+		CN_INIT_SINGLE;
+		REPEAT_3(6, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0);
 
-	CONST_INIT(ctx[0], 0);
-	CONST_INIT(ctx[1], 1);
-	CONST_INIT(ctx[2], 2);
-
-	uint8_t* l0 = ctx[0]->long_state;
-	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
-	uint8_t* l1 = ctx[1]->long_state;
-	uint64_t* h1 = (uint64_t*)ctx[1]->hash_state;
-	uint8_t* l2 = ctx[2]->long_state;
-	uint64_t* h2 = (uint64_t*)ctx[2]->hash_state;
-
-	__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
-	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-	__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
-	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-	__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
-	__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-	__m128i cx0 = _mm_set_epi64x(0, 0);
-	__m128i cx1 = _mm_set_epi64x(0, 0);
-	__m128i cx2 = _mm_set_epi64x(0, 0);
-
-	uint64_t idx0, idx1, idx2;
-	idx0 = _mm_cvtsi128_si64(ax0);
-	idx1 = _mm_cvtsi128_si64(ax1);
-	idx2 = _mm_cvtsi128_si64(ax2);
-
-	for (size_t i = 0; i < ITERATIONS/2; i++)
-	{
-		uint64_t hi, lo;
-		__m128i *ptr0, *ptr1, *ptr2;
-
-		// EVEN ROUND
-		CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
-
-		CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
-
-		CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
-
-		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
-
-		// ODD ROUND
-		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
-
-		CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
-
-		CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
-
-		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
-	}
+		// Optim - 90% time boundary
+		for(size_t i = 0; i < ITERATIONS; i++)
+		{
+			REPEAT_3(7, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_3(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_3(11, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_3(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_3(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
+		}
 
-	for (size_t i = 0; i < 3; i++)
-	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
-		keccakf((uint64_t*)ctx[i]->hash_state, 24);
-		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
+		REPEAT_3(0, CN_FINALIZE);
 	}
-}
+};
 
-// This even lovelier creation will do 4 cn hashes at a time.
-template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
-void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
+template< >
+struct Cryptonight_hash<4>
 {
-	constexpr size_t MASK = cn_select_mask<ALGO>();
-	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
-	constexpr size_t MEM = cn_select_memory<ALGO>();
+	static constexpr size_t N = 4;
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43)
+	template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
+	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 	{
-		memset(output, 0, 32 * 4);
-		return;
-	}
+		constexpr size_t MASK = cn_select_mask<ALGO>();
+		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+		constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	for (size_t i = 0; i < 4; i++)
-	{
-		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
-	}
+		CN_INIT_SINGLE;
+		REPEAT_4(6, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0);
 
-	CONST_INIT(ctx[0], 0);
-	CONST_INIT(ctx[1], 1);
-	CONST_INIT(ctx[2], 2);
-	CONST_INIT(ctx[3], 3);
-
-	uint8_t* l0 = ctx[0]->long_state;
-	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
-	uint8_t* l1 = ctx[1]->long_state;
-	uint64_t* h1 = (uint64_t*)ctx[1]->hash_state;
-	uint8_t* l2 = ctx[2]->long_state;
-	uint64_t* h2 = (uint64_t*)ctx[2]->hash_state;
-	uint8_t* l3 = ctx[3]->long_state;
-	uint64_t* h3 = (uint64_t*)ctx[3]->hash_state;
-
-	__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
-	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-	__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
-	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-	__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
-	__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-	__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
-	__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
-	__m128i cx0 = _mm_set_epi64x(0, 0);
-	__m128i cx1 = _mm_set_epi64x(0, 0);
-	__m128i cx2 = _mm_set_epi64x(0, 0);
-	__m128i cx3 = _mm_set_epi64x(0, 0);
-
-	uint64_t idx0, idx1, idx2, idx3;
-	idx0 = _mm_cvtsi128_si64(ax0);
-	idx1 = _mm_cvtsi128_si64(ax1);
-	idx2 = _mm_cvtsi128_si64(ax2);
-	idx3 = _mm_cvtsi128_si64(ax3);
-
-	for (size_t i = 0; i < ITERATIONS/2; i++)
-	{
-		uint64_t hi, lo;
-		__m128i *ptr0, *ptr1, *ptr2, *ptr3;
-
-		// EVEN ROUND
-		CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
-
-		CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
-
-		CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
-
-		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
-		CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
-
-		// ODD ROUND
-		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
-
-		CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
-
-		CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
-
-		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
-		CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
-	}
+		// Optim - 90% time boundary
+		for(size_t i = 0; i < ITERATIONS; i++)
+		{
+			REPEAT_4(7, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_4(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_4(11, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_4(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_4(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
+		}
 
-	for (size_t i = 0; i < 4; i++)
-	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
-		keccakf((uint64_t*)ctx[i]->hash_state, 24);
-		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
+		REPEAT_4(0, CN_FINALIZE);
 	}
-}
+};
 
-// This most lovely creation will do 5 cn hashes at a time.
-template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
-void cryptonight_penta_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
+template< >
+struct Cryptonight_hash<5>
 {
-	constexpr size_t MASK = cn_select_mask<ALGO>();
-	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
-	constexpr size_t MEM = cn_select_memory<ALGO>();
+	static constexpr size_t N = 5;
 
-	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43)
+	template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
+	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 	{
-		memset(output, 0, 32 * 5);
-		return;
-	}
+		constexpr size_t MASK = cn_select_mask<ALGO>();
+		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+		constexpr size_t MEM = cn_select_memory<ALGO>();
 
-	for (size_t i = 0; i < 5; i++)
-	{
-		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
-	}
+		CN_INIT_SINGLE;
+		REPEAT_5(6, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0);
 
-	CONST_INIT(ctx[0], 0);
-	CONST_INIT(ctx[1], 1);
-	CONST_INIT(ctx[2], 2);
-	CONST_INIT(ctx[3], 3);
-	CONST_INIT(ctx[4], 4);
-
-	uint8_t* l0 = ctx[0]->long_state;
-	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
-	uint8_t* l1 = ctx[1]->long_state;
-	uint64_t* h1 = (uint64_t*)ctx[1]->hash_state;
-	uint8_t* l2 = ctx[2]->long_state;
-	uint64_t* h2 = (uint64_t*)ctx[2]->hash_state;
-	uint8_t* l3 = ctx[3]->long_state;
-	uint64_t* h3 = (uint64_t*)ctx[3]->hash_state;
-	uint8_t* l4 = ctx[4]->long_state;
-	uint64_t* h4 = (uint64_t*)ctx[4]->hash_state;
-
-	__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
-	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-	__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
-	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-	__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
-	__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-	__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
-	__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
-	__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
-	__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
-	__m128i cx0 = _mm_set_epi64x(0, 0);
-	__m128i cx1 = _mm_set_epi64x(0, 0);
-	__m128i cx2 = _mm_set_epi64x(0, 0);
-	__m128i cx3 = _mm_set_epi64x(0, 0);
-	__m128i cx4 = _mm_set_epi64x(0, 0);
-
-	uint64_t idx0, idx1, idx2, idx3, idx4;
-	idx0 = _mm_cvtsi128_si64(ax0);
-	idx1 = _mm_cvtsi128_si64(ax1);
-	idx2 = _mm_cvtsi128_si64(ax2);
-	idx3 = _mm_cvtsi128_si64(ax3);
-	idx4 = _mm_cvtsi128_si64(ax4);
-
-	for (size_t i = 0; i < ITERATIONS/2; i++)
-	{
-		uint64_t hi, lo;
-		__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
-
-		// EVEN ROUND
-		CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
-		CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
-
-		CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
-		CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4);
-
-		CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
-		CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
-
-		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
-		CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
-		CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
-
-		// ODD ROUND
-		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
-		CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
-
-		CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
-		CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
-
-		CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
-		CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
-
-		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
-		CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
-		CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
-	}
+		// Optim - 90% time boundary
+		for(size_t i = 0; i < ITERATIONS; i++)
+		{
+			REPEAT_5(7, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_5(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
+			REPEAT_5(11, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_5(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
+			REPEAT_5(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
+		}
 
-	for (size_t i = 0; i < 5; i++)
-	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
-		keccakf((uint64_t*)ctx[i]->hash_state, 24);
-		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
+		REPEAT_5(0, CN_FINALIZE);
 	}
-}
+};
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index a8452ebb16ce56fe8b451b42ebad7e296b4e7455..93ce218a34f656cef896035799516b1d99b2c253 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -234,7 +234,7 @@ bool minethd::self_test()
 
 	unsigned char out[32 * MAX_N];
 	cn_hash_fun hashf;
-	cn_hash_fun_multi hashf_multi;
+	cn_hash_fun hashf_multi;
 
 	xmrstak_algo algo = xmrstak_algo::invalid_algo;
 
@@ -248,37 +248,37 @@ bool minethd::self_test()
 		if(algo == cryptonight)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
-			hashf("This is a test", 14, out, ctx[0]);
+			hashf("This is a test", 14, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
-			hashf("This is a test", 14, out, ctx[0]);
+			hashf("This is a test", 14, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
-			hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
+			hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 			hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 					"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
-			hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
+			hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
 			hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 					"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
-			hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
+			hashf_multi = func_multi_selector<3>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 			hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0;
 
-			hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
+			hashf_multi = func_multi_selector<4>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 			hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0;
 
-			hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
+			hashf_multi = func_multi_selector<5>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 			hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 					"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
@@ -289,7 +289,7 @@ bool minethd::self_test()
 		else if(algo == cryptonight_lite)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_lite);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_lite);
@@ -298,71 +298,71 @@ bool minethd::self_test()
 		else if(algo == cryptonight_monero)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0;
 		}
 		else if(algo == cryptonight_aeon)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_aeon);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_aeon);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0;
 		}
 		else if(algo == cryptonight_ipbc)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_ipbc);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0xb0", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_ipbc);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0", 32) == 0;
 		}
 		else if(algo == cryptonight_stellite)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_stellite);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_stellite);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0;
 		}
 		else if(algo == cryptonight_masari)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_masari);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_masari);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0;
 		}
 		else if(algo == cryptonight_heavy)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_heavy);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_heavy);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0;
 		}
 		else if(algo == cryptonight_haven)
 		{
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_haven);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0;
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_haven);
-			hashf("This is a test This is a test This is a test", 44, out, ctx[0]);
+			hashf("This is a test This is a test This is a test", 44, out, ctx);
 			bResult = bResult &&  memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0;
 		}
 		else if(algo == cryptonight_bittube2)
@@ -372,13 +372,13 @@ bool minethd::self_test()
 
 			hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_bittube2);
 
-			hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx[0]);
+			hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx);
 			bResult = bResult &&  memcmp(out, "\x18\x2c\x30\x41\x93\x1a\x14\x73\xc6\xbf\x7e\x77\xfe\xb5\x17\x9b\xa8\xbe\xa9\x68\xba\x9e\xe1\xe8\x24\x1a\x12\x7a\xac\x81\xb4\x24", 32) == 0;
 
-			hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx[0]);
+			hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx);
 			bResult = bResult && memcmp(out, "\x7f\xbe\xb9\x92\x76\x87\x5a\x3c\x43\xc2\xbe\x5a\x73\x36\x06\xb5\xdc\x79\xcc\x9c\xf3\x7c\x43\x3e\xb4\x18\x56\x17\xfb\x9b\xc9\x36", 32) == 0;
 
-			hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx[0]);
+			hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx);
 			bResult = bResult && memcmp(out, "\x90\xdc\x65\x53\x8d\xb0\x00\xea\xa2\x52\xcd\xd4\x1c\x17\x7a\x64\xfe\xff\x95\x36\xe7\x71\x68\x35\xd4\xcf\x5c\x73\x56\xb1\x2f\xcd", 32) == 0;
 		}
 
@@ -438,8 +438,10 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 	return pvThreads;
 }
 
-minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
+template<size_t N>
+minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
 {
+	static_assert(N >= 1, "number of threads must be >= 1" );
 	// We have two independent flag bits in the functions
 	// therefore we will build a binary digit and select the
 	// function as a two digit binary
@@ -483,46 +485,55 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr
 	}
 
 	static const cn_hash_fun func_table[] = {
-		cryptonight_hash<cryptonight_monero, false, false>,
-		cryptonight_hash<cryptonight_monero, true, false>,
-		cryptonight_hash<cryptonight_monero, false, true>,
-		cryptonight_hash<cryptonight_monero, true, true>,
-		cryptonight_hash<cryptonight_lite, false, false>,
-		cryptonight_hash<cryptonight_lite, true, false>,
-		cryptonight_hash<cryptonight_lite, false, true>,
-		cryptonight_hash<cryptonight_lite, true, true>,
-		cryptonight_hash<cryptonight, false, false>,
-		cryptonight_hash<cryptonight, true, false>,
-		cryptonight_hash<cryptonight, false, true>,
-		cryptonight_hash<cryptonight, true, true>,
-		cryptonight_hash<cryptonight_heavy, false, false>,
-		cryptonight_hash<cryptonight_heavy, true, false>,
-		cryptonight_hash<cryptonight_heavy, false, true>,
-		cryptonight_hash<cryptonight_heavy, true, true>,
-		cryptonight_hash<cryptonight_aeon, false, false>,
-		cryptonight_hash<cryptonight_aeon, true, false>,
-		cryptonight_hash<cryptonight_aeon, false, true>,
-		cryptonight_hash<cryptonight_aeon, true, true>,
-		cryptonight_hash<cryptonight_ipbc, false, false>,
-		cryptonight_hash<cryptonight_ipbc, true, false>,
-		cryptonight_hash<cryptonight_ipbc, false, true>,
-		cryptonight_hash<cryptonight_ipbc, true, true>,
-		cryptonight_hash<cryptonight_stellite, false, false>,
-		cryptonight_hash<cryptonight_stellite, true, false>,
-		cryptonight_hash<cryptonight_stellite, false, true>,
-		cryptonight_hash<cryptonight_stellite, true, true>,
-		cryptonight_hash<cryptonight_masari, false, false>,
-		cryptonight_hash<cryptonight_masari, true, false>,
-		cryptonight_hash<cryptonight_masari, false, true>,
-		cryptonight_hash<cryptonight_masari, true, true>,
-		cryptonight_hash<cryptonight_haven, false, false>,
-		cryptonight_hash<cryptonight_haven, true, false>,
-		cryptonight_hash<cryptonight_haven, false, true>,
-		cryptonight_hash<cryptonight_haven, true, true>,
-		cryptonight_hash<cryptonight_bittube2, false, false>,
-		cryptonight_hash<cryptonight_bittube2, true, false>,
-		cryptonight_hash<cryptonight_bittube2, false, true>,
-		cryptonight_hash<cryptonight_bittube2, true, true>
+		Cryptonight_hash<N>::template hash<cryptonight_monero, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_monero, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_monero, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_monero, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_lite, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_lite, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_lite, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_lite, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_heavy, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_heavy, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_heavy, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_heavy, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_aeon, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_aeon, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_aeon, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_aeon, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_ipbc, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_ipbc, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_ipbc, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_ipbc, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_stellite, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_stellite, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_stellite, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_stellite, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_masari, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_masari, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_masari, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_masari, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_haven, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_haven, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_haven, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_haven, true, true>,
+
+		Cryptonight_hash<N>::template hash<cryptonight_bittube2, false, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_bittube2, true, false>,
+		Cryptonight_hash<N>::template hash<cryptonight_bittube2, false, true>,
+		Cryptonight_hash<N>::template hash<cryptonight_bittube2, true, true>
 	};
 
 	std::bitset<2> digit;
@@ -532,333 +543,14 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr
 	return func_table[ algv << 2 | digit.to_ulong() ];
 }
 
-void minethd::work_main()
+minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
 {
-	if(affinity >= 0) //-1 means no affinity
-		bindMemoryToNUMANode(affinity);
-
-	order_fix.set_value();
-	std::unique_lock<std::mutex> lck(thd_aff_set);
-	lck.release();
-	std::this_thread::yield();
-
-	cryptonight_ctx* ctx;
-	uint64_t iCount = 0;
-	uint64_t* piHashVal;
-	uint32_t* piNonce;
-	job_result result;
-
-	// start with root algorithm and switch later if fork version is reached
-	auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
-	cn_hash_fun hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
-	ctx = minethd_alloc_ctx();
-
-	piHashVal = (uint64_t*)(result.bResult + 24);
-	piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
-	result.iThreadId = iThreadNo;
-
-	uint8_t version = 0;
-	size_t lastPoolId = 0;
-
-	while (bQuit == 0)
-	{
-		if (oWork.bStall)
-		{
-			/* We are stalled here because the executor didn't find a job for us yet,
-			 * either because of network latency, or a socket problem. Since we are
-			 * raison d'etre of this software it us sensible to just wait until we have something
-			 */
-
-			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
-				std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-			globalStates::inst().consume_work(oWork, iJobNo);
-			continue;
-		}
-
-		size_t nonce_ctr = 0;
-		constexpr size_t nonce_chunk = 4096; // Needs to be a power of 2
-
-		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
-		memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID));
-
-		if(oWork.bNiceHash)
-			result.iNonce = *piNonce;
-
-		uint8_t new_version = oWork.getVersion();
-		if(new_version != version || oWork.iPoolId != lastPoolId)
-		{
-			coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId);
-			if(new_version >= coinDesc.GetMiningForkVersion())
-			{
-				miner_algo = coinDesc.GetMiningAlgo();
-				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
-			}
-			else
-			{
-				miner_algo = coinDesc.GetMiningAlgoRoot();
-				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
-			}
-			result.algorithm = miner_algo;
-			lastPoolId = oWork.iPoolId;
-			version = new_version;
-		}
-
-		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
-		{
-			if ((iCount++ & 0xF) == 0) //Store stats every 16 hashes
-			{
-				uint64_t iStamp = get_timestamp_ms();
-				iHashCount.store(iCount, std::memory_order_relaxed);
-				iTimestamp.store(iStamp, std::memory_order_relaxed);
-			}
-
-			if((nonce_ctr++ & (nonce_chunk-1)) == 0)
-			{
-				globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
-				// check if the job is still valid, there is a small posibility that the job is switched
-				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
-					break;
-			}
-
-			*piNonce = result.iNonce;
-
-			hash_fun(oWork.bWorkBlob, oWork.iWorkSize, result.bResult, ctx);
-
-			if (*piHashVal < oWork.iTarget)
-				executor::inst()->push_event(ex_event(result, oWork.iPoolId));
-			result.iNonce++;
-
-			std::this_thread::yield();
-		}
-
-		globalStates::inst().consume_work(oWork, iJobNo);
-	}
-
-	cryptonight_free_ctx(ctx);
+	return func_multi_selector<1>(bHaveAes, bNoPrefetch, algo);
 }
 
-minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
+void minethd::work_main()
 {
-	// We have two independent flag bits in the functions
-	// therefore we will build a binary digit and select the
-	// function as a two digit binary
-
-	uint8_t algv;
-	switch(algo)
-	{
-	case cryptonight:
-		algv = 2;
-		break;
-	case cryptonight_lite:
-		algv = 1;
-		break;
-	case cryptonight_monero:
-		algv = 0;
-		break;
-	case cryptonight_heavy:
-		algv = 3;
-		break;
-	case cryptonight_aeon:
-		algv = 4;
-		break;
-	case cryptonight_ipbc:
-		algv = 5;
-		break;
-	case cryptonight_stellite:
-		algv = 6;
-		break;
-	case cryptonight_masari:
-		algv = 7;
-		break;
-	case cryptonight_haven:
-		algv = 8;
-		break;
-	case cryptonight_bittube2:
-		algv = 9;
-		break;
-	default:
-		algv = 2;
-		break;
-	}
-
-	static const cn_hash_fun_multi func_table[] = {
-		cryptonight_double_hash<cryptonight_monero, false, false>,
-		cryptonight_double_hash<cryptonight_monero, true, false>,
-		cryptonight_double_hash<cryptonight_monero, false, true>,
-		cryptonight_double_hash<cryptonight_monero, true, true>,
-		cryptonight_triple_hash<cryptonight_monero, false, false>,
-		cryptonight_triple_hash<cryptonight_monero, true, false>,
-		cryptonight_triple_hash<cryptonight_monero, false, true>,
-		cryptonight_triple_hash<cryptonight_monero, true, true>,
-		cryptonight_quad_hash<cryptonight_monero, false, false>,
-		cryptonight_quad_hash<cryptonight_monero, true, false>,
-		cryptonight_quad_hash<cryptonight_monero, false, true>,
-		cryptonight_quad_hash<cryptonight_monero, true, true>,
-		cryptonight_penta_hash<cryptonight_monero, false, false>,
-		cryptonight_penta_hash<cryptonight_monero, true, false>,
-		cryptonight_penta_hash<cryptonight_monero, false, true>,
-		cryptonight_penta_hash<cryptonight_monero, true, true>,
-
-		cryptonight_double_hash<cryptonight_lite, false, false>,
-		cryptonight_double_hash<cryptonight_lite, true, false>,
-		cryptonight_double_hash<cryptonight_lite, false, true>,
-		cryptonight_double_hash<cryptonight_lite, true, true>,
-		cryptonight_triple_hash<cryptonight_lite, false, false>,
-		cryptonight_triple_hash<cryptonight_lite, true, false>,
-		cryptonight_triple_hash<cryptonight_lite, false, true>,
-		cryptonight_triple_hash<cryptonight_lite, true, true>,
-		cryptonight_quad_hash<cryptonight_lite, false, false>,
-		cryptonight_quad_hash<cryptonight_lite, true, false>,
-		cryptonight_quad_hash<cryptonight_lite, false, true>,
-		cryptonight_quad_hash<cryptonight_lite, true, true>,
-		cryptonight_penta_hash<cryptonight_lite, false, false>,
-		cryptonight_penta_hash<cryptonight_lite, true, false>,
-		cryptonight_penta_hash<cryptonight_lite, false, true>,
-		cryptonight_penta_hash<cryptonight_lite, true, true>,
-
-		cryptonight_double_hash<cryptonight, false, false>,
-		cryptonight_double_hash<cryptonight, true, false>,
-		cryptonight_double_hash<cryptonight, false, true>,
-		cryptonight_double_hash<cryptonight, true, true>,
-		cryptonight_triple_hash<cryptonight, false, false>,
-		cryptonight_triple_hash<cryptonight, true, false>,
-		cryptonight_triple_hash<cryptonight, false, true>,
-		cryptonight_triple_hash<cryptonight, true, true>,
-		cryptonight_quad_hash<cryptonight, false, false>,
-		cryptonight_quad_hash<cryptonight, true, false>,
-		cryptonight_quad_hash<cryptonight, false, true>,
-		cryptonight_quad_hash<cryptonight, true, true>,
-		cryptonight_penta_hash<cryptonight, false, false>,
-		cryptonight_penta_hash<cryptonight, true, false>,
-		cryptonight_penta_hash<cryptonight, false, true>,
-		cryptonight_penta_hash<cryptonight, true, true>,
-
-		cryptonight_double_hash<cryptonight_heavy, false, false>,
-		cryptonight_double_hash<cryptonight_heavy, true, false>,
-		cryptonight_double_hash<cryptonight_heavy, false, true>,
-		cryptonight_double_hash<cryptonight_heavy, true, true>,
-		cryptonight_triple_hash<cryptonight_heavy, false, false>,
-		cryptonight_triple_hash<cryptonight_heavy, true, false>,
-		cryptonight_triple_hash<cryptonight_heavy, false, true>,
-		cryptonight_triple_hash<cryptonight_heavy, true, true>,
-		cryptonight_quad_hash<cryptonight_heavy, false, false>,
-		cryptonight_quad_hash<cryptonight_heavy, true, false>,
-		cryptonight_quad_hash<cryptonight_heavy, false, true>,
-		cryptonight_quad_hash<cryptonight_heavy, true, true>,
-		cryptonight_penta_hash<cryptonight_heavy, false, false>,
-		cryptonight_penta_hash<cryptonight_heavy, true, false>,
-		cryptonight_penta_hash<cryptonight_heavy, false, true>,
-		cryptonight_penta_hash<cryptonight_heavy, true, true>,
-
-		cryptonight_double_hash<cryptonight_aeon, false, false>,
-		cryptonight_double_hash<cryptonight_aeon, true, false>,
-		cryptonight_double_hash<cryptonight_aeon, false, true>,
-		cryptonight_double_hash<cryptonight_aeon, true, true>,
-		cryptonight_triple_hash<cryptonight_aeon, false, false>,
-		cryptonight_triple_hash<cryptonight_aeon, true, false>,
-		cryptonight_triple_hash<cryptonight_aeon, false, true>,
-		cryptonight_triple_hash<cryptonight_aeon, true, true>,
-		cryptonight_quad_hash<cryptonight_aeon, false, false>,
-		cryptonight_quad_hash<cryptonight_aeon, true, false>,
-		cryptonight_quad_hash<cryptonight_aeon, false, true>,
-		cryptonight_quad_hash<cryptonight_aeon, true, true>,
-		cryptonight_penta_hash<cryptonight_aeon, false, false>,
-		cryptonight_penta_hash<cryptonight_aeon, true, false>,
-		cryptonight_penta_hash<cryptonight_aeon, false, true>,
-		cryptonight_penta_hash<cryptonight_aeon, true, true>,
-
-		cryptonight_double_hash<cryptonight_ipbc, false, false>,
-		cryptonight_double_hash<cryptonight_ipbc, true, false>,
-		cryptonight_double_hash<cryptonight_ipbc, false, true>,
-		cryptonight_double_hash<cryptonight_ipbc, true, true>,
-		cryptonight_triple_hash<cryptonight_ipbc, false, false>,
-		cryptonight_triple_hash<cryptonight_ipbc, true, false>,
-		cryptonight_triple_hash<cryptonight_ipbc, false, true>,
-		cryptonight_triple_hash<cryptonight_ipbc, true, true>,
-		cryptonight_quad_hash<cryptonight_ipbc, false, false>,
-		cryptonight_quad_hash<cryptonight_ipbc, true, false>,
-		cryptonight_quad_hash<cryptonight_ipbc, false, true>,
-		cryptonight_quad_hash<cryptonight_ipbc, true, true>,
-		cryptonight_penta_hash<cryptonight_ipbc, false, false>,
-		cryptonight_penta_hash<cryptonight_ipbc, true, false>,
-		cryptonight_penta_hash<cryptonight_ipbc, false, true>,
-		cryptonight_penta_hash<cryptonight_ipbc, true, true>,
-
-		cryptonight_double_hash<cryptonight_stellite, false, false>,
-		cryptonight_double_hash<cryptonight_stellite, true, false>,
-		cryptonight_double_hash<cryptonight_stellite, false, true>,
-		cryptonight_double_hash<cryptonight_stellite, true, true>,
-		cryptonight_triple_hash<cryptonight_stellite, false, false>,
-		cryptonight_triple_hash<cryptonight_stellite, true, false>,
-		cryptonight_triple_hash<cryptonight_stellite, false, true>,
-		cryptonight_triple_hash<cryptonight_stellite, true, true>,
-		cryptonight_quad_hash<cryptonight_stellite, false, false>,
-		cryptonight_quad_hash<cryptonight_stellite, true, false>,
-		cryptonight_quad_hash<cryptonight_stellite, false, true>,
-		cryptonight_quad_hash<cryptonight_stellite, true, true>,
-		cryptonight_penta_hash<cryptonight_stellite, false, false>,
-		cryptonight_penta_hash<cryptonight_stellite, true, false>,
-		cryptonight_penta_hash<cryptonight_stellite, false, true>,
-		cryptonight_penta_hash<cryptonight_stellite, true, true>,
-
-		cryptonight_double_hash<cryptonight_masari, false, false>,
-		cryptonight_double_hash<cryptonight_masari, true, false>,
-		cryptonight_double_hash<cryptonight_masari, false, true>,
-		cryptonight_double_hash<cryptonight_masari, true, true>,
-		cryptonight_triple_hash<cryptonight_masari, false, false>,
-		cryptonight_triple_hash<cryptonight_masari, true, false>,
-		cryptonight_triple_hash<cryptonight_masari, false, true>,
-		cryptonight_triple_hash<cryptonight_masari, true, true>,
-		cryptonight_quad_hash<cryptonight_masari, false, false>,
-		cryptonight_quad_hash<cryptonight_masari, true, false>,
-		cryptonight_quad_hash<cryptonight_masari, false, true>,
-		cryptonight_quad_hash<cryptonight_masari, true, true>,
-		cryptonight_penta_hash<cryptonight_masari, false, false>,
-		cryptonight_penta_hash<cryptonight_masari, true, false>,
-		cryptonight_penta_hash<cryptonight_masari, false, true>,
-		cryptonight_penta_hash<cryptonight_masari, true, true>,
-		
-		cryptonight_double_hash<cryptonight_haven, false, false>,
-		cryptonight_double_hash<cryptonight_haven, true, false>,
-		cryptonight_double_hash<cryptonight_haven, false, true>,
-		cryptonight_double_hash<cryptonight_haven, true, true>,
-		cryptonight_triple_hash<cryptonight_haven, false, false>,
-		cryptonight_triple_hash<cryptonight_haven, true, false>,
-		cryptonight_triple_hash<cryptonight_haven, false, true>,
-		cryptonight_triple_hash<cryptonight_haven, true, true>,
-		cryptonight_quad_hash<cryptonight_haven, false, false>,
-		cryptonight_quad_hash<cryptonight_haven, true, false>,
-		cryptonight_quad_hash<cryptonight_haven, false, true>,
-		cryptonight_quad_hash<cryptonight_haven, true, true>,
-		cryptonight_penta_hash<cryptonight_haven, false, false>,
-		cryptonight_penta_hash<cryptonight_haven, true, false>,
-		cryptonight_penta_hash<cryptonight_haven, false, true>,
-		cryptonight_penta_hash<cryptonight_haven, true, true>,
-
-		cryptonight_double_hash<cryptonight_bittube2, false, false>,
-		cryptonight_double_hash<cryptonight_bittube2, true, false>,
-		cryptonight_double_hash<cryptonight_bittube2, false, true>,
-		cryptonight_double_hash<cryptonight_bittube2, true, true>,
-		cryptonight_triple_hash<cryptonight_bittube2, false, false>,
-		cryptonight_triple_hash<cryptonight_bittube2, true, false>,
-		cryptonight_triple_hash<cryptonight_bittube2, false, true>,
-		cryptonight_triple_hash<cryptonight_bittube2, true, true>,
-		cryptonight_quad_hash<cryptonight_bittube2, false, false>,
-		cryptonight_quad_hash<cryptonight_bittube2, true, false>,
-		cryptonight_quad_hash<cryptonight_bittube2, false, true>,
-		cryptonight_quad_hash<cryptonight_bittube2, true, true>,
-		cryptonight_penta_hash<cryptonight_bittube2, false, false>,
-		cryptonight_penta_hash<cryptonight_bittube2, true, false>,
-		cryptonight_penta_hash<cryptonight_bittube2, false, true>,
-		cryptonight_penta_hash<cryptonight_bittube2, true, true>
-	};
-
-	std::bitset<2> digit;
-	digit.set(0, !bHaveAes);
-	digit.set(1, !bNoPrefetch);
-
-	return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()];
+	multiway_work_main<1u>();
 }
 
 void minethd::double_work_main()
@@ -926,7 +618,7 @@ void minethd::multiway_work_main()
 
 	// start with root algorithm and switch later if fork version is reached
 	auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
-	cn_hash_fun_multi hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
+	cn_hash_fun hash_fun_multi = func_multi_selector<N>(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
 	uint8_t version = 0;
 	size_t lastPoolId = 0;
 
@@ -961,12 +653,12 @@ void minethd::multiway_work_main()
 			if(new_version >= coinDesc.GetMiningForkVersion())
 			{
 				miner_algo = coinDesc.GetMiningAlgo();
-				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
+				hash_fun_multi = func_multi_selector<N>(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
 			}
 			else
 			{
 				miner_algo = coinDesc.GetMiningAlgoRoot();
-				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
+				hash_fun_multi = func_multi_selector<N>(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, miner_algo);
 			}
 			lastPoolId = oWork.iPoolId;
 			version = new_version;
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 2d40ce3144a381d1cadcca493fb6d9ef1f5aafa0..26478542cddad4d7cda9725450d0621c44cd73c7 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -22,7 +22,7 @@ public:
 	static std::vector<iBackend*> thread_starter(uint32_t threadOffset, miner_work& pWork);
 	static bool self_test();
 
-	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
+	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**);
 
 	static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
 	static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id);
@@ -30,8 +30,9 @@ public:
 	static cryptonight_ctx* minethd_alloc_ctx();
 
 private:
-	typedef void (*cn_hash_fun_multi)(const void*, size_t, void*, cryptonight_ctx**);
-	static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
+
+	template<size_t N>
+	static cn_hash_fun func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
 
 	minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity);
 
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 88a1acc324e1484f0bc1e038a186e6dffc8948ef..486a990e3a6df02d3719c4d23505e652a6ba9e7e 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -300,7 +300,7 @@ void minethd::work_main()
 
 				*(uint32_t*)(bWorkBlob + 39) = foundNonce[i];
 
-				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
+				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx);
 				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
 					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
 				else
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index d4ae03864a038201d15c1dc05cae4398b2fd405a..389356842b325269150b2f6d399fe91d7cc1a2ce 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -28,7 +28,7 @@ public:
 	static bool self_test();
 
 private:
-	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
+	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**);
 
 	minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg);
 	void start_mining();