From 1b18f598aa1190a0e6126ed2c70e052e9403d180 Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocryptHPC@gmail.com>
Date: Sun, 8 Apr 2018 20:40:54 +0200
Subject: [PATCH] refactor scratchpad creation

Use the maximum scratchpad size from before and after the fork.
---
 xmrstak/backend/amd/amd_gpu/gpu.cpp               | 10 ++++++----
 xmrstak/backend/amd/autoAdjust.hpp                |  5 ++++-
 xmrstak/backend/cpu/autoAdjust.hpp                |  5 ++++-
 xmrstak/backend/cpu/autoAdjustHwloc.hpp           |  5 ++++-
 xmrstak/backend/cpu/crypto/cryptonight_common.cpp | 11 +++++++++--
 xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu    | 10 ++++++++--
 6 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index b9cc9b6..79e80bd 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -308,9 +308,10 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		return ERR_OCL_API;
 	}
 
-	size_t scratchPadSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
-	int threadMemMask = cn_select_mask(::jconf::inst()->GetMiningAlgo());
-	int hashIterations = cn_select_iter(::jconf::inst()->GetMiningAlgo());
+	size_t scratchPadSize = std::max(
+		cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+		cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+	);
 
 	size_t g_thd = ctx->rawIntensity;
 	ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, scratchPadSize * g_thd, NULL, &ret);
@@ -382,6 +383,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 
 	for(int ii = 0; ii < num_algos; ++ii)
 	{
+		// scratchpad size for the selected mining algorithm
 		size_t hashMemSize = cn_select_memory(miner_algo[ii]);
 		int threadMemMask = cn_select_mask(miner_algo[ii]);
 		int hashIterations = cn_select_iter(miner_algo[ii]);
@@ -493,7 +495,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 					p_id++;
 				}
 
-			if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS)
+				if((ret = clGetProgramInfo(ctx->Program[ii], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS)
 				{
 					printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret));
 					return ERR_OCL_API;
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index c798cf3..6df0eea 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -83,7 +83,10 @@ private:
 
 		constexpr size_t byteToMiB = 1024u * 1024u;
 
-		size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+		size_t hashMemSize = std::max(
+			cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+			cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+		);
 
 		std::string conf;
 		for(auto& ctx : devVec)
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp
index abba8b6..ed96d8b 100644
--- a/xmrstak/backend/cpu/autoAdjust.hpp
+++ b/xmrstak/backend/cpu/autoAdjust.hpp
@@ -36,7 +36,10 @@ public:
 	bool printConfig()
 	{
 
-		const size_t hashMemSizeKB = cn_select_memory(::jconf::inst()->GetMiningAlgo()) / 1024u;
+		const size_t hashMemSizeKB = std::max(
+			cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+			cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+		) / 1024u;
 		const size_t halfHashMemSizeKB = hashMemSizeKB / 2u;
 
 		configEditor configTpl{};
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
index 68d2b3f..f110ee3 100644
--- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -28,7 +28,10 @@ public:
 
 	autoAdjust()
 	{
-		hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+		hashMemSize = std::max(
+			cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+			cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+		);
 		halfHashMemSize = hashMemSize / 2u;
 	}
 
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
index 17fa24b..ac696dd 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
+++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
@@ -35,6 +35,7 @@ extern "C"
 #include "xmrstak/jconf.hpp"
 #include <stdio.h>
 #include <stdlib.h>
+#include <algorithm>
 
 #ifdef __GNUC__
 #include <mm_malloc.h>
@@ -202,7 +203,10 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
 
 cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
 {
-	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+	size_t hashMemSize = std::max(
+		cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+		cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+	);
 
 	cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096);
 
@@ -278,7 +282,10 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
 
 void cryptonight_free_ctx(cryptonight_ctx* ctx)
 {
-	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+	size_t hashMemSize = std::max(
+		cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+		cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+	);
 
 	if(ctx->ctx_info[0] != 0)
 	{
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index ead93c5..f016ef4 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -279,7 +279,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
 	if(gpuArch < 70)
 		CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
-	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+	size_t hashMemSize = std::max(
+		cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+		cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+	);
 
 	size_t wsize = ctx->device_blocks * ctx->device_threads;
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize));
@@ -576,7 +579,10 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		ctx->total_device_memory = totalMemory;
 		ctx->free_device_memory = freeMemory;
 
-		size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+		size_t hashMemSize = std::max(
+			cn_select_memory(::jconf::inst()->GetMiningAlgo()),
+			cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
+		);
 
 #ifdef WIN32
 		/* We use in windows bfactor (split slow kernel into smaller parts) to avoid
-- 
GitLab