diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index 6354f60f03e4a5f5afdfe37cb393bba156cc47fd..27783acd10aee5951293a7179d692c0155a02527 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -96,7 +96,7 @@ private:
 					"    \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
 					"    \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" :  " + std::to_string(ctx.device_bsleep) + ",\n" +
 					"    \"affine_to_cpu\" : false, \"sync_mode\" : 3,\n" +
-					"    \"comp_mode\" : true,\n" +
+					"    \"comp_mode\" : false,\n" +
 					"  },\n";
 			}
 		}
diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl
index e2a76d90f1b8bb93e208c1e26c255da02b2cde5d..8803f6ff20e7af2129419f10534185a567d585de 100644
--- a/xmrstak/backend/nvidia/config.tpl
+++ b/xmrstak/backend/nvidia/config.tpl
@@ -17,7 +17,7 @@ R"===(// generated by XMRSTAK_VERSION
  *                 2 = cudaDeviceScheduleYield
  *                 3 = cudaDeviceScheduleBlockingSync (default)
  * comp_mode     - Compatibility if true it will use 64bit memory loads and if false it will use
- *                               128bit memory loads (can produce invalid results)
+ *                               256bit memory loads (can produce invalid results)
  *                               (this option has only a meaning for cryptonight_v8 and monero)
  *
  * On the first run the miner will look at your system and suggest a basic configuration that will work,
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 1c9c9df64bbcb73f588effde0ed3542515aea674..3dce3e4ac08f51648ec1002ee179026afa746eb8 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -343,7 +343,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in
 			}
 		}
 		else
-			((ulong4*)myChunks)[sub] = ((ulong4*)ptr0)[sub];
+			((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub];
 
 		uint32_t idx1 = (idx0 & 0x30) >> 3;
 
@@ -381,7 +381,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in
 			}
 		}
 		else
-			((ulong4*)ptr0)[sub] = ((ulong4*)myChunks)[sub];
+			((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub];
 
 		idx0 = shuffle<2>(sPtr, sub, cx_aes.x, 0);
 		idx1 = (idx0 & 0x30) >> 3;
@@ -396,7 +396,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in
 			}
 		}
 		else
-			((ulong4*)myChunks)[sub] = ((ulong4*)ptr0)[sub];
+			((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub];
 
 		if(ALGO != cryptonight_monero_v8)
 			bx0 = cx_aes;
@@ -461,7 +461,7 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in
 			}
 		}
 		else
-			((ulong4*)ptr0)[sub] = ((ulong4*)myChunks)[sub];
+			((ulonglong4*)ptr0)[sub] = ((ulonglong4*)myChunks)[sub];
 		ax0 ^= c;
 		idx0 = shuffle<2>(sPtr, sub, static_cast<uint32_t>(ax0), 0);
 	}