diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index 7d0ad1818afd61c92daf8b86a9079b4d8ca05d91..286bc39b6f841cc4c1b863442cdb9a4acff5904b 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -718,7 +718,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 #elif(ALGO==11)
 			SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0];
 #	ifdef __NV_CL_C_VERSION
-			// flush shuffeled data
+			// flush shuffled data
 			SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line;
  			idx0 = c[0] & MASK;
  			idxS = idx0 & 0x30;
@@ -786,7 +786,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
 // cryptonight_monero_v8
 #if (ALGO == 11)
 #	if defined(__NV_CL_C_VERSION)
-			// flush shuffeled data
+			// flush shuffled data
 			SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line;
 #	endif
 			b_x[1] = b_x[0];
diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl
index 63106bcb93550cfb5726893a16b66a520a5aaf16..043b05355dd296d4a59b8784ff1848d71aa9754a 100644
--- a/xmrstak/backend/amd/config.tpl
+++ b/xmrstak/backend/amd/config.tpl
@@ -9,12 +9,12 @@ R"===(
  *                 2 = chunked memory, chunk size is controlled by 'mem_chunk'
  *                     required: intensity must be a multiple of worksize
  *                 1 or true  = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks
- *                             (not allowed for cryptonight_v8 ans monero8)
+ *                             (not allowed for cryptonight_v8 and monero8)
  *                 0 or false = use a contiguous block of memory per thread
  * mem_chunk     - range 0 to 18: set the number of elements (16byte) per chunk
  *                 this value is only used if 'strided_index' == 2
  *                 element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte)
- * unroll        - allow to control how often the POW main loop is unrolled; valid range [0;128]
+ * unroll        - allow to control how often the POW main loop is unrolled; valid range [0;128) - for most OpenCL implementations it must be a power of two.
  * comp_mode     - Compatibility enable/disable the automatic guard around compute kernel which allows
  *                 to use a intensity which is not the multiple of the worksize.
  *                 If you set false and the intensity is not multiple of the worksize the miner can crash:
diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp
index cd2486973b216d658c9fe79a06e0f11ac1ca927a..777dbdbb5f73d6865c51f8fc396ef30f1b180d94 100644
--- a/xmrstak/backend/amd/jconf.cpp
+++ b/xmrstak/backend/amd/jconf.cpp
@@ -151,9 +151,9 @@ bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg)
 
 	cfg.memChunk = (int)memChunk->GetInt64();
 	
-	if(!unroll->IsUint64() || (int)unroll->GetInt64() >= 128 )
+	if(!unroll->IsUint64() || (int)unroll->GetInt64() >= 128 ||  )
 	{
-		printer::inst()->print_msg(L0, "ERROR: unroll must be smaller than 128");
+		printer::inst()->print_msg(L0, "ERROR: unroll must be smaller than 128 and a power of two");
 		return false;
 	}
 	cfg.unroll = (int)unroll->GetInt64();
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S
index 3aa8994ddce271ba4930a1497957149dd6007fe6..b6be9438f65c19d9e305419982184d845d9c81c3 100644
--- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S
@@ -14,7 +14,7 @@ ALIGN 8
 FN_PREFIX(cryptonight_v8_mainloop_ivybridge_asm):
 	sub rsp, 48
 	mov rcx, rdi
-        #include "cryptonight_v8_main_loop_ivybridge_linux.inc"
+	#include "cryptonight_v8_main_loop_ivybridge_linux.inc"
 	add rsp, 48
 	ret 0
 
@@ -22,6 +22,6 @@ ALIGN 8
 FN_PREFIX(cryptonight_v8_mainloop_ryzen_asm):
 	sub rsp, 48
 	mov rcx, rdi
-        #include "cryptonight_v8_main_loop_ryzen_linux.inc"
+	#include "cryptonight_v8_main_loop_ryzen_linux.inc"
 	add rsp, 48
 	ret 0
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm
index 3c2bba619045348ed6af14a7393ec14853860d68..a1615e9bd5a630e403582f43bbadea86633b6ef2 100644
--- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm
@@ -4,13 +4,13 @@ PUBLIC cryptonight_v8_mainloop_ryzen_asm
 
 ALIGN 8
 cryptonight_v8_mainloop_ivybridge_asm PROC
-        INCLUDE cryptonight_v8_main_loop_ivybridge_win64.inc
+	INCLUDE cryptonight_v8_main_loop_ivybridge_win64.inc
 	ret 0
 cryptonight_v8_mainloop_ivybridge_asm ENDP
 
 ALIGN 8
 cryptonight_v8_mainloop_ryzen_asm PROC
-        INCLUDE cryptonight_v8_main_loop_ryzen_win64.inc
+	INCLUDE cryptonight_v8_main_loop_ryzen_win64.inc
 	ret 0
 cryptonight_v8_mainloop_ryzen_asm ENDP
 
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc
index 23f6cc06069e436731c96055584a0c94eebca570..21f1f48c34ecde27a014c57a8ab3e8061f013d11 100644
--- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ivybridge_linux.inc
@@ -157,14 +157,15 @@ sqrt_fixup_ivybridge_ret:
 
 sqrt_fixup_ivybridge:
 	dec	 rdx
-	movq r13, -4389456576512
+	mov	r13d, -1022
+ 	shl	r13, 32
 	mov	 rax, rdx
 	shr	 rdx, 19
 	shr	 rax, 20
 	mov	 rcx, rdx
 	sub	 rcx, rax
 	add	 rax, r13
-	movq r13, 4389456576511
+	not	r13
 	sub	 rcx, r13
 	mov	 r13d, -2147483647
 	imul	 rcx, rax
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc
index 551ee85734e032268b8415fc889b74bb1fefcf3b..9c177b85aeef77cc50a356b187b536dcbae4f27c 100644
--- a/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop_ryzen_linux.inc
@@ -157,14 +157,14 @@ sqrt_fixup_ryzen_ret:
 sqrt_fixup_ryzen:
 	movq r9, xmm2
 	dec	rdi
-	movq rdx, 4389456576511
+	mov	edx, -1022
+ 	shl	rdx, 32
 	mov	rax, rdi
 	shr	rdi, 19
 	shr	rax, 20
 	mov	rcx, rdi
 	sub	rcx, rax
-	sub	rcx, rdx
-	movq rdx, -4389456576512
+	lea	rcx, [rcx+rdx+1]
 	add	rax, rdx
 	imul	rcx, rax
 	sub	rcx, r9
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index 844e4c045ef28c61d8c69292b3a87374e1b49e2c..6edae905ee12a1f8060ba08108a15dd06a036def 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -441,8 +441,8 @@ inline uint64_t int_sqrt33_1_double_precision(const uint64_t n0)
 	_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
 #else
 	// GCC versions prior to 7 don't generate correct assembly for _subborrow_u64 -> _addcarry_u64 sequence
- 	// Fallback to simpler code
- 	if (x2 < n0) ++r;
+	// Fallback to simpler code
+	if (x2 < n0) ++r;
 #endif
 	return r;
 }
@@ -733,7 +733,7 @@ inline void set_float_rounding_mode()
 /** add append n to all arguments and keeps n as first argument
  *
  * @param n number which is appended to the arguments (expect the first argument n)
- * 
+ *
  * @code{.cpp}
  * CN_ENUM_2(1, foo, bar)
  * // is transformed to
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index 2f01d5e9096f6b62f079a1acb98f8c148859213b..05743ae922134649e7ffec347ef88fb71010e7fa 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -453,7 +453,7 @@ template<size_t N>
 minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo, const std::string& asm_version_str)
 {
 	static_assert(N >= 1, "number of threads must be >= 1" );
-	
+
 	// check for asm optimized version for cryptonight_v8
 	if(N == 1 && algo == cryptonight_monero_v8 && bHaveAes)
 	{