From 5db8000cef4389590e8f2b8108c62ba88ffdfbd2 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Mon, 16 Jul 2018 20:57:58 +0200
Subject: [PATCH] NVIDIA bittube2 improvement

5% more hash per second
---
 xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 39f51ea..6c64751 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -268,26 +268,27 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 				k[2] = shuffle<4>(sPtr,sub, k[0], sub + 2);
 				k[3] = shuffle<4>(sPtr,sub, k[0], sub + 3);
 
-				uint32_t r;
-
 				#pragma unroll 4
 				for(int i = 0; i < 4; ++i)
 				{
 					// only calculate the key if all data are up to date
 					if(i == sub)
 					{
-						r = a ^
+						d[x] = a ^
 							t_fn0( k[0] & 0xff ) ^
 							t_fn1( (k[1] >> 8) & 0xff ) ^
 							t_fn2( (k[2] >> 16) & 0xff ) ^
 							t_fn3( (k[3] >> 24 ) );
 					}
-					/* avoid negative number for modulo
-					 * load valid key (k) depending on the round
-					 */
-					k[(4 - sub + i)%4] = shuffle<4>(sPtr,sub, k[0] ^ r, i);
+					// the last shuffle is not needed
+					if(i != 3)
+					{
+						/* avoid negative number for modulo
+						 * load valid key (k) depending on the round
+						 */
+						k[(4 - sub + i)%4] = shuffle<4>(sPtr,sub, k[0] ^ d[x], i);
+					}
 				}
-				d[x] = r;
 			}
 			else
 			{
-- 
GitLab