From de4cce9a9bf9b67dbe26098972d3c1b4e00d597f Mon Sep 17 00:00:00 2001
From: psychocrypt <psychocrypt@users.noreply.github.com>
Date: Sun, 24 Sep 2017 20:03:42 +0200
Subject: [PATCH] add amd backend

- add backend
- add auto suggestion
---
 backend/amd/amd_gpu/gpu.cpp | 869 ++++++++++++++++++++++++++++++++++++
 backend/amd/amd_gpu/gpu.h   |  50 +++
 backend/amd/autoAdjust.hpp  | 113 +++++
 backend/amd/config.tpl      |  24 +
 backend/amd/jconf.cpp       | 261 +++++++++++
 backend/amd/jconf.h         |  43 ++
 backend/amd/minethd.cpp     | 241 ++++++++++
 backend/amd/minethd.h       |  60 +++
 8 files changed, 1661 insertions(+)
 create mode 100644 backend/amd/amd_gpu/gpu.cpp
 create mode 100644 backend/amd/amd_gpu/gpu.h
 create mode 100644 backend/amd/autoAdjust.hpp
 create mode 100644 backend/amd/config.tpl
 create mode 100644 backend/amd/jconf.cpp
 create mode 100644 backend/amd/jconf.h
 create mode 100644 backend/amd/minethd.cpp
 create mode 100644 backend/amd/minethd.h

diff --git a/backend/amd/amd_gpu/gpu.cpp b/backend/amd/amd_gpu/gpu.cpp
new file mode 100644
index 0000000..ebf1093
--- /dev/null
+++ b/backend/amd/amd_gpu/gpu.cpp
@@ -0,0 +1,869 @@
+/*
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation, either version 3 of the License, or
+  * any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+  */
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <iostream>
+#include <vector>
+
+#ifdef _WIN32
+#include <windows.h>
+const char* sSourcePath = "opencl\\cryptonight.cl";
+
+static inline void port_sleep(size_t sec)
+{
+	Sleep(sec * 1000);
+}
+#else
+#include <unistd.h>
+const char* sSourcePath = "opencl/cryptonight.cl";
+
+static inline void port_sleep(size_t sec)
+{
+	sleep(sec);
+}
+#endif // _WIN32
+
+#if 0
+static inline long long unsigned int int_port(size_t i)
+{
+	return i;
+}
+#endif
+
+#include "gpu.h"
+
+const char* err_to_str(cl_int ret)
+{
+	switch(ret)
+	{
+	case CL_SUCCESS:
+		return "CL_SUCCESS";
+	case CL_DEVICE_NOT_FOUND:
+		return "CL_DEVICE_NOT_FOUND";
+	case CL_DEVICE_NOT_AVAILABLE:
+		return "CL_DEVICE_NOT_AVAILABLE";
+	case CL_COMPILER_NOT_AVAILABLE:
+		return "CL_COMPILER_NOT_AVAILABLE";
+	case CL_MEM_OBJECT_ALLOCATION_FAILURE:
+		return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+	case CL_OUT_OF_RESOURCES:
+		return "CL_OUT_OF_RESOURCES";
+	case CL_OUT_OF_HOST_MEMORY:
+		return "CL_OUT_OF_HOST_MEMORY";
+	case CL_PROFILING_INFO_NOT_AVAILABLE:
+		return "CL_PROFILING_INFO_NOT_AVAILABLE";
+	case CL_MEM_COPY_OVERLAP:
+		return "CL_MEM_COPY_OVERLAP";
+	case CL_IMAGE_FORMAT_MISMATCH:
+		return "CL_IMAGE_FORMAT_MISMATCH";
+	case CL_IMAGE_FORMAT_NOT_SUPPORTED:
+		return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+	case CL_BUILD_PROGRAM_FAILURE:
+		return "CL_BUILD_PROGRAM_FAILURE";
+	case CL_MAP_FAILURE:
+		return "CL_MAP_FAILURE";
+	case CL_MISALIGNED_SUB_BUFFER_OFFSET:
+		return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+	case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
+		return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+	case CL_COMPILE_PROGRAM_FAILURE:
+		return "CL_COMPILE_PROGRAM_FAILURE";
+	case CL_LINKER_NOT_AVAILABLE:
+		return "CL_LINKER_NOT_AVAILABLE";
+	case CL_LINK_PROGRAM_FAILURE:
+		return "CL_LINK_PROGRAM_FAILURE";
+	case CL_DEVICE_PARTITION_FAILED:
+		return "CL_DEVICE_PARTITION_FAILED";
+	case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
+		return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+	case CL_INVALID_VALUE:
+		return "CL_INVALID_VALUE";
+	case CL_INVALID_DEVICE_TYPE:
+		return "CL_INVALID_DEVICE_TYPE";
+	case CL_INVALID_PLATFORM:
+		return "CL_INVALID_PLATFORM";
+	case CL_INVALID_DEVICE:
+		return "CL_INVALID_DEVICE";
+	case CL_INVALID_CONTEXT:
+		return "CL_INVALID_CONTEXT";
+	case CL_INVALID_QUEUE_PROPERTIES:
+		return "CL_INVALID_QUEUE_PROPERTIES";
+	case CL_INVALID_COMMAND_QUEUE:
+		return "CL_INVALID_COMMAND_QUEUE";
+	case CL_INVALID_HOST_PTR:
+		return "CL_INVALID_HOST_PTR";
+	case CL_INVALID_MEM_OBJECT:
+		return "CL_INVALID_MEM_OBJECT";
+	case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
+		return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+	case CL_INVALID_IMAGE_SIZE:
+		return "CL_INVALID_IMAGE_SIZE";
+	case CL_INVALID_SAMPLER:
+		return "CL_INVALID_SAMPLER";
+	case CL_INVALID_BINARY:
+		return "CL_INVALID_BINARY";
+	case CL_INVALID_BUILD_OPTIONS:
+		return "CL_INVALID_BUILD_OPTIONS";
+	case CL_INVALID_PROGRAM:
+		return "CL_INVALID_PROGRAM";
+	case CL_INVALID_PROGRAM_EXECUTABLE:
+		return "CL_INVALID_PROGRAM_EXECUTABLE";
+	case CL_INVALID_KERNEL_NAME:
+		return "CL_INVALID_KERNEL_NAME";
+	case CL_INVALID_KERNEL_DEFINITION:
+		return "CL_INVALID_KERNEL_DEFINITION";
+	case CL_INVALID_KERNEL:
+		return "CL_INVALID_KERNEL";
+	case CL_INVALID_ARG_INDEX:
+		return "CL_INVALID_ARG_INDEX";
+	case CL_INVALID_ARG_VALUE:
+		return "CL_INVALID_ARG_VALUE";
+	case CL_INVALID_ARG_SIZE:
+		return "CL_INVALID_ARG_SIZE";
+	case CL_INVALID_KERNEL_ARGS:
+		return "CL_INVALID_KERNEL_ARGS";
+	case CL_INVALID_WORK_DIMENSION:
+		return "CL_INVALID_WORK_DIMENSION";
+	case CL_INVALID_WORK_GROUP_SIZE:
+		return "CL_INVALID_WORK_GROUP_SIZE";
+	case CL_INVALID_WORK_ITEM_SIZE:
+		return "CL_INVALID_WORK_ITEM_SIZE";
+	case CL_INVALID_GLOBAL_OFFSET:
+		return "CL_INVALID_GLOBAL_OFFSET";
+	case CL_INVALID_EVENT_WAIT_LIST:
+		return "CL_INVALID_EVENT_WAIT_LIST";
+	case CL_INVALID_EVENT:
+		return "CL_INVALID_EVENT";
+	case CL_INVALID_OPERATION:
+		return "CL_INVALID_OPERATION";
+	case CL_INVALID_GL_OBJECT:
+		return "CL_INVALID_GL_OBJECT";
+	case CL_INVALID_BUFFER_SIZE:
+		return "CL_INVALID_BUFFER_SIZE";
+	case CL_INVALID_MIP_LEVEL:
+		return "CL_INVALID_MIP_LEVEL";
+	case CL_INVALID_GLOBAL_WORK_SIZE:
+		return "CL_INVALID_GLOBAL_WORK_SIZE";
+	case CL_INVALID_PROPERTY:
+		return "CL_INVALID_PROPERTY";
+	case CL_INVALID_IMAGE_DESCRIPTOR:
+		return "CL_INVALID_IMAGE_DESCRIPTOR";
+	case CL_INVALID_COMPILER_OPTIONS:
+		return "CL_INVALID_COMPILER_OPTIONS";
+	case CL_INVALID_LINKER_OPTIONS:
+		return "CL_INVALID_LINKER_OPTIONS";
+	case CL_INVALID_DEVICE_PARTITION_COUNT:
+		return "CL_INVALID_DEVICE_PARTITION_COUNT";
+#ifdef CL_VERSION_2_0
+	case CL_INVALID_PIPE_SIZE:
+		return "CL_INVALID_PIPE_SIZE";
+	case CL_INVALID_DEVICE_QUEUE:
+		return "CL_INVALID_DEVICE_QUEUE";
+#endif
+	default:
+		return "UNKNOWN_ERROR";
+	}
+}
+
+#if 0
+void printer::inst()->print_msg(L1,const char* fmt, ...);
+void printer::inst()->print_str(const char* str);
+#endif
+
+char* LoadTextFile(const char* filename)
+{
+	size_t flen;
+	char* out;
+	FILE* kernel = fopen(filename, "rb");
+
+	if(kernel == NULL)
+		return NULL;
+
+	fseek(kernel, 0, SEEK_END);
+	flen = ftell(kernel);
+	fseek(kernel, 0, SEEK_SET);
+
+	out = (char*)malloc(flen+1);
+	size_t r = fread(out, flen, 1, kernel);
+	fclose(kernel);
+
+	if(r != 1)
+	{
+		free(out);
+		return NULL;
+	}
+
+	out[flen] = '\0';
+	return out;
+}
+
+size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, char* source_code)
+{
+	size_t MaximumWorkSize;
+	cl_int ret;
+
+	if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaximumWorkSize, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when querying a device's max worksize using clGetDeviceInfo.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
+#ifdef CL_VERSION_2_0
+	const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 };
+	ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
+#else
+	const cl_command_queue_properties CommandQueueProperties = { 0 };
+	ctx->CommandQueues = clCreateCommandQueue(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
+#endif
+
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateCommandQueueWithProperties.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 88, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	size_t g_thd = ctx->rawIntensity;
+	ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, (1 << 21) * g_thd, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	ctx->ExtraBuffers[1] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, 200 * g_thd, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash states buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Blake-256 branches
+	ctx->ExtraBuffers[2] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 0 buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Groestl-256 branches
+	ctx->ExtraBuffers[3] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 1 buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// JH-256 branches
+	ctx->ExtraBuffers[4] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 2 buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Skein-512 branches
+	ctx->ExtraBuffers[5] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 3 buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Assume we may find up to 0xFF nonces in one run - it's reasonable
+	ctx->OutputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create output buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the contents of cryptonight.cl", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	char options[32];
+	snprintf(options, sizeof(options), "-I. -DWORKSIZE=%llu", int_port(ctx->workSize));
+	ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL);
+	if(ret != CL_SUCCESS)
+	{
+		size_t len;
+		printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret));
+
+		if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+
+		char* BuildLog = (char*)malloc(len + 1);
+		BuildLog[0] = '\0';
+
+		if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
+		{
+			free(BuildLog);
+			printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+
+		printer::inst()->print_str("Build log:\n");
+		printer::inst()->print_str(BuildLog);
+
+		free(BuildLog);
+		return ERR_OCL_API;
+	}
+
+	cl_build_status status;
+	do
+	{
+		if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+		port_sleep(1);
+	}
+	while(status == CL_BUILD_IN_PROGRESS);
+
+	const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" };
+	for(int i = 0; i < 7; ++i)
+	{
+		ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret);
+		if(ret != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel %s.", err_to_str(ret), KernelNames[i]);
+			return ERR_OCL_API;
+		}
+	}
+
+	ctx->Nonce = 0;
+	return 0;
+}
+
+const cl_platform_info attributeTypes[5] = {
+    CL_PLATFORM_NAME,
+    CL_PLATFORM_VENDOR,
+    CL_PLATFORM_VERSION,
+    CL_PLATFORM_PROFILE,
+    CL_PLATFORM_EXTENSIONS
+};
+
+const char* const attributeNames[] = {
+    "CL_PLATFORM_NAME",
+    "CL_PLATFORM_VENDOR",
+    "CL_PLATFORM_VERSION",
+    "CL_PLATFORM_PROFILE",
+    "CL_PLATFORM_EXTENSIONS"
+};
+
+#define NELEMS(x)  (sizeof(x) / sizeof((x)[0]))
+
+void PrintDeviceInfo(cl_device_id device)
+{
+    char queryBuffer[1024];
+    int queryInt;
+    cl_int clError;
+    clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
+    printf("    CL_DEVICE_NAME: %s\n", queryBuffer);
+    queryBuffer[0] = '\0';
+    clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
+    printf("    CL_DEVICE_VENDOR: %s\n", queryBuffer);
+    queryBuffer[0] = '\0';
+    clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
+    printf("    CL_DRIVER_VERSION: %s\n", queryBuffer);
+    queryBuffer[0] = '\0';
+    clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
+    printf("    CL_DEVICE_VERSION: %s\n", queryBuffer);
+    queryBuffer[0] = '\0';
+    clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
+    printf("    CL_DEVICE_MAX_COMPUTE_UNITS: %d\n", queryInt);
+}
+
+uint32_t getNumPlatforms()
+{
+	cl_uint num_platforms = 0;
+	cl_platform_id * platforms = NULL;
+	cl_int clStatus;
+
+	// Get platform and device information
+	clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
+	platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
+	clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
+
+	return num_platforms;
+}
+
+std::vector<GpuContext> getAMDDevices(int index)
+{
+	std::vector<GpuContext> ctxVec;
+	cl_platform_id * platforms = NULL;
+	cl_int clStatus;
+	cl_uint num_devices;
+	cl_device_id *device_list = NULL;
+
+	uint32_t numPlatforms = getNumPlatforms();
+
+
+    platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
+    clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
+
+	clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+	device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices);
+	clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
+	for (int k = 0; k < num_devices; k++) {
+		cl_int clError;
+		std::vector<char> devVendorVec(1024);
+		clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL);
+		std::string devVendor(devVendorVec.data());
+		if( devVendor.find("Advanced Micro Devices") != std::string::npos)
+		{
+			GpuContext ctx;
+			ctx.deviceIdx = k;
+			clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL);
+			size_t maxMem;
+			clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL);
+			clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL);
+			// if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory
+			ctx.freeMem = std::min(ctx.freeMem, maxMem);
+			std::vector<char> devNameVec(1024);
+			clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL);
+			ctx.name = std::string(devNameVec.data());
+			printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
+			ctx.DeviceID = device_list[k];
+			ctxVec.push_back(ctx);
+		}
+	}
+	
+
+	free(device_list);
+	free(platforms);
+
+	return ctxVec;
+}
+
+int getAMDPlatformIdx()
+{
+
+	uint32_t numPlatforms = getNumPlatforms();
+
+	if(numPlatforms == 0)
+	{
+		printer::inst()->print_msg(L0,"WARNING: No OpenCL platform found.");
+		return -1;
+	}
+	cl_platform_id * platforms = NULL;
+	cl_int clStatus;
+
+	platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
+    clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
+
+	int platformIndex = -1;
+
+	for (int i = 0; i < numPlatforms; i++) {
+		size_t infoSize;
+		clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
+		std::vector<char> platformNameVec(infoSize);
+
+		clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
+		std::string platformName(platformNameVec.data());
+		if( platformName.find("Advanced Micro Devices") != std::string::npos)
+		{
+			platformIndex = i;
+			printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
+			break;
+		}
+	}
+
+	free(platforms);
+	return platformIndex;
+}
+
+// RequestedDeviceIdxs is a list of OpenCL device indexes
+// NumDevicesRequested is number of devices in RequestedDeviceIdxs list
+// Returns 0 on success, -1 on stupid params, -2 on OpenCL API error
+size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
+{
+
+	cl_context opencl_ctx;
+	cl_int ret;
+	cl_uint entries;
+
+	if((ret = clGetPlatformIDs(0, NULL, &entries)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for number of platforms.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+
+	// The number of platforms naturally is the index of the last platform plus one.
+	if(entries <= platform_idx)
+	{
+		printer::inst()->print_msg(L1,"Selected OpenCL platform index %d doesn't exist.", platform_idx);
+		return ERR_STUPID_PARAMS;
+	}
+
+
+
+	cl_platform_id * platforms = NULL;
+	cl_int clStatus;
+	uint32_t numPlatforms = getNumPlatforms();
+
+	platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
+    clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
+
+	size_t infoSize;
+	clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
+	std::vector<char> platformNameVec(infoSize);
+	clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
+	std::string platformName(platformNameVec.data());
+	if( platformName.find("Advanced Micro Devices") == std::string::npos)
+	{
+		printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
+		return ERR_STUPID_PARAMS;
+	}
+	
+	free(platforms);
+
+	/*MSVC skimping on devel costs by shoehorning C99 to be a subset of C++? Noooo... can't be.*/
+#ifdef __GNUC__
+	cl_platform_id PlatformIDList[entries];
+#else
+	cl_platform_id* PlatformIDList = _alloca(entries * sizeof(cl_platform_id));
+#endif
+	if((ret = clGetPlatformIDs(entries, PlatformIDList, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for platform ID information.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Same as the platform index sanity check, except we must check all requested device indexes
+	for(int i = 0; i < num_gpus; ++i)
+	{
+		if(entries <= ctx[i].deviceIdx)
+		{
+			printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx);
+			return ERR_STUPID_PARAMS;
+		}
+	}
+
+#ifdef __GNUC__
+	cl_device_id DeviceIDList[entries];
+#else
+	cl_device_id* DeviceIDList = _alloca(entries * sizeof(cl_device_id));
+#endif
+	if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, entries, DeviceIDList, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for device ID information.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Indexes sanity checked above
+#ifdef __GNUC__
+	cl_device_id TempDeviceList[num_gpus];
+#else
+	cl_device_id* TempDeviceList = _alloca(entries * sizeof(cl_device_id));
+#endif
+	for(int i = 0; i < num_gpus; ++i)
+	{
+		ctx[i].DeviceID = DeviceIDList[ctx[i].deviceIdx];
+		TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx];
+	}
+
+	opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList, NULL, NULL, &ret);
+	if(ret != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	char* source_code = LoadTextFile(sSourcePath);
+	if(source_code == NULL)
+	{
+		printer::inst()->print_msg(L1,"Couldn't locate GPU source code file at %s.", sSourcePath);
+		return ERR_STUPID_PARAMS;
+	}
+
+	for(int i = 0; i < num_gpus; ++i)
+	{
+		if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code)) != ERR_SUCCESS)
+		{
+			free(source_code);
+			return ret;
+		}
+	}
+	free(source_code);
+
+	return ERR_SUCCESS;
+}
+
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target)
+{
+	cl_int ret;
+
+	if(input_len > 84)
+		return ERR_STUPID_PARAMS;
+
+	input[input_len] = 0x01;
+	memset(input + input_len + 1, 0, 88 - input_len - 1);
+
+	if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	if((ret = clSetKernelArg(ctx->Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Scratchpads
+	if((ret = clSetKernelArg(ctx->Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// States
+	if((ret = clSetKernelArg(ctx->Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// CN2 Kernel
+
+	// Scratchpads
+	if((ret = clSetKernelArg(ctx->Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// States
+	if((ret = clSetKernelArg(ctx->Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// CN3 Kernel
+	// Scratchpads
+	if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// States
+	if((ret = clSetKernelArg(ctx->Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Branch 0
+	if((ret = clSetKernelArg(ctx->Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Branch 1
+	if((ret = clSetKernelArg(ctx->Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Branch 2
+	if((ret = clSetKernelArg(ctx->Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	// Branch 3
+	if((ret = clSetKernelArg(ctx->Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	for(int i = 0; i < 4; ++i)
+	{
+		// States
+		if((ret = clSetKernelArg(ctx->Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0);
+			return ERR_OCL_API;
+		}
+
+		// Nonce buffer
+		if((ret = clSetKernelArg(ctx->Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1);
+			return ERR_OCL_API;
+		}
+
+		// Output
+		if((ret = clSetKernelArg(ctx->Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2);
+			return ERR_OCL_API;
+		}
+
+		// Target
+		if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_uint), &target)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
+			return ERR_OCL_API;
+		}
+	}
+
+	return ERR_SUCCESS;
+}
+
+size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
+{
+	cl_int ret;
+	cl_uint zero = 0;
+	size_t BranchNonces[4];
+	memset(BranchNonces,0,sizeof(size_t)*4);
+
+	size_t g_thd = ctx->rawIntensity;
+	size_t w_size = ctx->workSize;
+
+	for(int i = 2; i < 6; ++i)
+	{
+		if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->ExtraBuffers[i], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to zero branch buffer counter %d.", err_to_str(ret), i - 2);
+			return ERR_OCL_API;
+		}
+	}
+
+	if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	clFinish(ctx->CommandQueues);
+
+	size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 };
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
+		return ERR_OCL_API;
+	}
+
+	/*for(int i = 1; i < 3; ++i)
+	{
+		if((ret = clEnqueueNDRangeKernel(*ctx->CommandQueues, ctx->Kernels[i], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+		{
+			Log(LOG_CRITICAL, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i);
+			return(ERR_OCL_API);
+		}
+	}*/
+
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
+		return ERR_OCL_API;
+	}
+
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2);
+		return ERR_OCL_API;
+	}
+
+	if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[2], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[3], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 1, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[4], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 2, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[5], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 3, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	clFinish(ctx->CommandQueues);
+
+	for(int i = 0; i < 4; ++i)
+	{
+		if(BranchNonces[i])
+		{
+			// Threads
+			if((clSetKernelArg(ctx->Kernels[i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS)
+			{
+				printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
+				return(ERR_OCL_API);
+			}
+
+			BranchNonces[i] = ((size_t)ceil( (double)BranchNonces[i] / (double)w_size) ) * w_size;
+			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+			{
+				printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
+				return ERR_OCL_API;
+			}
+		}
+	}
+
+	if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_TRUE, 0, sizeof(cl_uint) * 0x100, HashOutput, 0, NULL, NULL)) != CL_SUCCESS)
+	{
+		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+		return ERR_OCL_API;
+	}
+
+	clFinish(ctx->CommandQueues);
+	ctx->Nonce += g_thd;
+
+	return ERR_SUCCESS;
+}
diff --git a/backend/amd/amd_gpu/gpu.h b/backend/amd/amd_gpu/gpu.h
new file mode 100644
index 0000000..8a71cfa
--- /dev/null
+++ b/backend/amd/amd_gpu/gpu.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <stdint.h>
+#include <vector>
+#include "../../../console.h"
+
+#define ERR_SUCCESS (0)
+#define ERR_OCL_API (2)
+#define ERR_STUPID_PARAMS (1)
+
+
+
+struct GpuContext
+{
+	/*Input vars*/
+	size_t deviceIdx;
+	size_t rawIntensity;
+	size_t workSize;
+
+	/*Output vars*/
+	cl_device_id DeviceID;
+	cl_command_queue CommandQueues;
+	cl_mem InputBuffer;
+	cl_mem OutputBuffer;
+	cl_mem ExtraBuffers[6];
+	cl_program Program;
+	cl_kernel Kernels[7];
+	size_t freeMem;
+	int computeUnits;
+	std::string name;
+
+	size_t Nonce;
+
+};
+
+uint32_t getNumPlatforms();
+int getAMDPlatformIdx();
+std::vector<GpuContext> getAMDDevices(int index);
+
+size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target);
+size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput);
+
+
diff --git a/backend/amd/autoAdjust.hpp b/backend/amd/autoAdjust.hpp
new file mode 100644
index 0000000..fd9ea94
--- /dev/null
+++ b/backend/amd/autoAdjust.hpp
@@ -0,0 +1,113 @@
+
+#pragma once
+
+#include "autoAdjust.hpp"
+
+
+#include "jconf.h"
+#include "../../console.h"
+#include "../../ConfigEditor.hpp"
+#include "amd_gpu/gpu.h"
+
+#include <vector>
+#include <cstdio>
+#include <sstream>
+#include <string>
+#include <iostream>
+#include  <algorithm>
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+
+namespace xmrstak
+{
+namespace amd
+{
+
+class autoAdjust
+{    
+public:
+
+    autoAdjust()
+    {
+
+    }
+
+    /** print the adjusted values if needed
+     *
+     * Routine exit the application and print the adjusted values if needed else
+     * nothing is happened.
+     */
+    bool printConfig()
+    {
+
+		int platformIndex = getAMDPlatformIdx();
+
+		if(platformIndex == -1)
+		{
+			printer::inst()->print_msg(L0,"WARNING: No AMD OpenCL platform found. Possible driver issues or wrong vendor driver.");
+			return false;
+		}
+
+		devVec = getAMDDevices(0);
+
+
+		int deviceCount = devVec.size();
+
+        if(deviceCount == 0)
+            return false;
+
+ 
+        generateThreadConfig(platformIndex);
+		return true;
+
+    }
+
+private:
+
+    void generateThreadConfig(const int platformIndex)
+    {
+		// load the template of the backend config into a char variable
+		const char *tpl =
+			#include "./config.tpl"
+		;
+
+		ConfigEditor configTpl{};
+		configTpl.set( std::string(tpl) );
+
+		std::string conf;
+		conf += std::string("\"gpu_threads_conf\" :\n[\n");
+        int i = 0;
+        for(auto& ctx : devVec)
+        {
+			// use 90% of available memory
+			size_t availableMem = (ctx.freeMem * 100u) / 110;
+			size_t units = ctx.computeUnits;
+			size_t perThread = (size_t(1u)<<21) + 224u;
+			size_t max_intensity = availableMem / perThread;
+			size_t intensity = std::min( size_t(1000u) , max_intensity );
+			conf += std::string(" // gpu: ") + ctx.name + "\n";
+            conf += std::string("  { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
+                "    \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
+                "    \"affine_to_cpu\" : false, \n"
+                "  },\n";
+            ++i;
+        }
+		conf += std::string("],\n\n");
+
+		configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
+		configTpl.replace("NUMGPUS",std::to_string(devVec.size()));
+		configTpl.replace("GPUCONFIG",conf);
+		configTpl.write("amd.txt");
+		printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", "amd.txt");
+    }
+
+    std::vector<GpuContext> devVec;
+};
+
+} // namespace amd
+} // namepsace xmrstak
diff --git a/backend/amd/config.tpl b/backend/amd/config.tpl
new file mode 100644
index 0000000..6d08730
--- /dev/null
+++ b/backend/amd/config.tpl
@@ -0,0 +1,24 @@
+R"===(
+
+/* 
+ * Number of GPUs that you have in your system. Each GPU will get its own CPU thread.
+ */
+"gpu_thread_num" : NUMGPUS,
+
+/*
+ * GPU configuration. You should play around with intensity and worksize as the fastest settings will vary.
+ *      index    - GPU index number usually starts from 0
+ *  intensity    - Number of parallel GPU threads (nothing to do with CPU threads)
+ *   worksize    - Number of local GPU threads (nothing to do with CPU threads)
+ * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner.
+ */
+
+
+GPUCONFIG
+
+/*
+ * Platform index. This will be 0 unless you have different OpenCL platform - eg. AMD and Intel.
+ */
+"platform_index" : PLATFORMINDEX,
+
+)==="
diff --git a/backend/amd/jconf.cpp b/backend/amd/jconf.cpp
new file mode 100644
index 0000000..f8a551e
--- /dev/null
+++ b/backend/amd/jconf.cpp
@@ -0,0 +1,261 @@
+/*
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation, either version 3 of the License, or
+  * any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+  *
+  * Additional permission under GNU GPL version 3 section 7
+  *
+  * If you modify this Program, or any covered work, by linking or combining
+  * it with OpenSSL (or a modified version of that library), containing parts
+  * covered by the terms of OpenSSL License and SSLeay License, the licensors
+  * of this Program grant you additional permission to convey the resulting work.
+  *
+  */
+
+
+#include "jconf.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#define strcasecmp _stricmp
+#include <intrin.h>
+#else
+#include <cpuid.h>
+#endif
+
+#include "../../rapidjson/document.h"
+#include "../../rapidjson/error/en.h"
+#include "../../jext.h"
+#include "../../console.h"
+
+namespace xmrstak
+{
+namespace amd
+{
+
+using namespace rapidjson;
+
+/*
+ * This enum needs to match index in oConfigValues, otherwise we will get a runtime error
+ */
+enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx };
+
+struct configVal {
+	configEnum iName;
+	const char* sName;
+	Type iType;
+};
+
+//Same order as in configEnum, as per comment above
+configVal oConfigValues[] = {
+	{ iGpuThreadNum, "gpu_thread_num", kNumberType },
+	{ aGpuThreadsConf, "gpu_threads_conf", kArrayType },
+	{ iPlatformIdx, "platform_index", kNumberType }
+};
+
+constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0]));
+
+inline bool checkType(Type have, Type want)
+{
+	if(want == have)
+		return true;
+	else if(want == kTrueType && have == kFalseType)
+		return true;
+	else if(want == kFalseType && have == kTrueType)
+		return true;
+	else
+		return false;
+}
+
+struct jconf::opaque_private
+{
+	Document jsonDoc;
+	const Value* configValues[iConfigCnt]; //Compile time constant
+
+	opaque_private()
+	{
+	}
+};
+
+jconf* jconf::oInst = nullptr;
+
+jconf::jconf()
+{
+	prv = new opaque_private();
+}
+
+bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg)
+{
+	if(id >= prv->configValues[aGpuThreadsConf]->Size())
+		return false;
+
+	const Value& oThdConf = prv->configValues[aGpuThreadsConf]->GetArray()[id];
+
+	if(!oThdConf.IsObject())
+		return false;
+
+	const Value *idx, *intensity, *w_size, *aff;
+	idx = GetObjectMember(oThdConf, "index");
+	intensity = GetObjectMember(oThdConf, "intensity");
+	w_size = GetObjectMember(oThdConf, "worksize");
+	aff = GetObjectMember(oThdConf, "affine_to_cpu");
+
+	if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr)
+		return false;
+
+	if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64())
+		return false;
+
+	if(!aff->IsUint64() && !aff->IsBool())
+		return false;
+
+	cfg.index = idx->GetUint64();
+	cfg.intensity = intensity->GetUint64();
+	cfg.w_size = w_size->GetUint64();
+
+	if(aff->IsNumber())
+		cfg.cpu_aff = aff->GetInt64();
+	else
+		cfg.cpu_aff = -1;
+
+	return true;
+}
+
+size_t jconf::GetPlatformIdx()
+{
+	return prv->configValues[iPlatformIdx]->GetUint64();
+}
+
+size_t jconf::GetThreadCount()
+{
+	return prv->configValues[aGpuThreadsConf]->Size();
+}
+
+bool jconf::parse_config(const char* sFilename)
+{
+	FILE * pFile;
+	char * buffer;
+	size_t flen;
+
+	pFile = fopen(sFilename, "rb");
+	if (pFile == NULL)
+	{
+		printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename);
+		return false;
+	}
+
+	fseek(pFile,0,SEEK_END);
+	flen = ftell(pFile);
+	rewind(pFile);
+
+	if(flen >= 64*1024)
+	{
+		fclose(pFile);
+		printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename);
+		return false;
+	}
+
+	if(flen <= 16)
+	{
+		printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename);
+		return false;
+	}
+
+	buffer = (char*)malloc(flen + 3);
+	if(fread(buffer+1, flen, 1, pFile) != 1)
+	{
+		free(buffer);
+		fclose(pFile);
+		printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename);
+		return false;
+	}
+	fclose(pFile);
+
+	//Replace Unicode BOM with spaces - we always use UTF-8
+	unsigned char* ubuffer = (unsigned char*)buffer;
+	if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF)
+	{
+		buffer[1] = ' ';
+		buffer[2] = ' ';
+		buffer[3] = ' ';
+	}
+
+	buffer[0] = '{';
+	buffer[flen] = '}';
+	buffer[flen + 1] = '\0';
+
+	prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2);
+	free(buffer);
+
+	if(prv->jsonDoc.HasParseError())
+	{
+		printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s",
+			int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError()));
+		return false;
+	}
+
+
+	if(!prv->jsonDoc.IsObject())
+	{ //This should never happen as we created the root ourselves
+		printer::inst()->print_msg(L0, "Invalid config file. No root?\n");
+		return false;
+	}
+
+	for(size_t i = 0; i < iConfigCnt; i++)
+	{
+		if(oConfigValues[i].iName != i)
+		{
+			printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order.");
+			return false;
+		}
+
+		prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName);
+
+		if(prv->configValues[i] == nullptr)
+		{
+			printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName);
+			return false;
+		}
+
+		if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType))
+		{
+			printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName);
+			return false;
+		}
+	}
+
+	size_t n_thd = prv->configValues[aGpuThreadsConf]->Size();
+	if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd)
+	{
+		printer::inst()->print_msg(L0,
+			"Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.",
+			int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64()));
+		return false;
+	}
+
+	thd_cfg c;
+	for(size_t i=0; i < n_thd; i++)
+	{
+		if(!GetThreadConfig(i, c))
+		{
+			printer::inst()->print_msg(L0, "Thread %llu has invalid config.", int_port(i));
+			return false;
+		}
+	}
+
+}
+
+} // namespace amd
+} // namespace xmrstak
diff --git a/backend/amd/jconf.h b/backend/amd/jconf.h
new file mode 100644
index 0000000..463284a
--- /dev/null
+++ b/backend/amd/jconf.h
@@ -0,0 +1,43 @@
+#pragma once
+#include <stdlib.h>
+#include <string>
+
+namespace xmrstak
+{
+namespace amd
+{
+
+class jconf
+{
+public:
+	static jconf* inst()
+	{
+		if (oInst == nullptr) oInst = new jconf;
+		return oInst;
+	};
+
+	bool parse_config(const char* sFilename = "amd.txt");
+
+	struct thd_cfg {
+		size_t index;
+		size_t intensity;
+		size_t w_size;
+		long long cpu_aff;
+	};
+
+	size_t GetThreadCount();
+	bool GetThreadConfig(size_t id, thd_cfg &cfg);
+
+	size_t GetPlatformIdx();
+
+private:
+	jconf();
+	static jconf* oInst;
+
+	struct opaque_private;
+	opaque_private* prv;
+
+};
+
+} // namespace amd
+} // namespace xmrstak
diff --git a/backend/amd/minethd.cpp b/backend/amd/minethd.cpp
new file mode 100644
index 0000000..7e3c603
--- /dev/null
+++ b/backend/amd/minethd.cpp
@@ -0,0 +1,241 @@
+/*
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation, either version 3 of the License, or
+  * any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+  *
+  * Additional permission under GNU GPL version 3 section 7
+  *
+  * If you modify this Program, or any covered work, by linking or combining
+  * it with OpenSSL (or a modified version of that library), containing parts
+  * covered by the terms of OpenSSL License and SSLeay License, the licensors
+  * of this Program grant you additional permission to convey the resulting work.
+  *
+  */
+
+#include <assert.h>
+#include <cmath>
+#include <chrono>
+#include <thread>
+
+#include "../../ConfigEditor.hpp"
+#include "autoAdjust.hpp"
+
+#include <vector>
+#include "../../console.h"
+#include "../../crypto/cryptonight_aesni.h"
+#include "../cpu/minethd.h"
+#include "../cpu/jconf.h"
+
+#include "../../executor.h"
+#include "minethd.h"
+#include "../../jconf.h"
+#include "../../crypto/cryptonight.h"
+#include "amd_gpu/gpu.h"
+
+
+namespace xmrstak
+{
+namespace amd
+{
+
+minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
+{
+	oWork = pWork;
+	bQuit = 0;
+	iThreadNo = (uint8_t)iNo;
+	iJobNo = 0;
+	iHashCount = 0;
+	iTimestamp = 0;
+	pGpuCtx = ctx;
+
+	oWorkThd = std::thread(&minethd::work_main, this);
+}
+
+extern "C"  std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork)
+{
+	return amd::minethd::thread_starter(threadOffset, pWork);
+}
+
+
+bool minethd::init_gpus()
+{
+	size_t i, n = jconf::inst()->GetThreadCount();
+
+	printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while...");
+	vGpuData.resize(n);
+
+	jconf::thd_cfg cfg;
+	for(i = 0; i < n; i++)
+	{
+		jconf::inst()->GetThreadConfig(i, cfg);
+		vGpuData[i].deviceIdx = cfg.index;
+		vGpuData[i].rawIntensity = cfg.intensity;
+		vGpuData[i].workSize = cfg.w_size;
+	}
+
+	return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS;
+}
+
+std::vector<GpuContext> minethd::vGpuData;
+
+std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
+{
+	std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>();
+
+	if(!ConfigEditor::file_exist("amd.txt"))
+	{
+		autoAdjust adjust;
+		if(!adjust.printConfig())
+			return pvThreads;
+	}
+/*
+	if(!ConfigEditor::file_exist("amd.txt"))
+	{
+		printer::inst()->print_msg(L0, "WARNING: missing config file 'amd.txt'");
+		return pvThreads;
+	}
+*/
+	if(!jconf::inst()->parse_config())
+	{
+		win_exit();
+	}
+
+	// \ todo get device count and exit if no opencl device 
+
+	if(!init_gpus())
+	{
+		printer::inst()->print_msg(L1, "WARNING: AMD device not found");
+		return pvThreads;
+	}
+	
+	size_t i, n = jconf::inst()->GetThreadCount();
+	pvThreads->reserve(n);
+
+	jconf::thd_cfg cfg;
+	for (i = 0; i < n; i++)
+	{
+		jconf::inst()->GetThreadConfig(i, cfg);
+		minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]);
+
+		if(cfg.cpu_aff >= 0)
+		{
+#if defined(__APPLE__)
+			printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
+#endif
+			cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
+		}
+
+		pvThreads->push_back(thd);
+		if(cfg.cpu_aff >= 0)
+			printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
+		else
+			printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
+	}
+
+	return pvThreads;
+}
+
+void minethd::switch_work(miner_work& pWork)
+{
+	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
+	// faster than threads can consume them. This should never happen in real life.
+	// Pool cant physically send jobs faster than every 250ms or so due to net latency.
+
+	while (GlobalStates::iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::iThreadCount)
+		std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+	GlobalStates::oGlobalWork = pWork;
+	GlobalStates::iConsumeCnt.store(0, std::memory_order_seq_cst);
+	GlobalStates::iGlobalJobNo++;
+}
+
+void minethd::consume_work()
+{
+	memcpy(&oWork, &GlobalStates::oGlobalWork, sizeof(miner_work));
+	iJobNo++;
+	GlobalStates::iConsumeCnt++;
+	
+}
+
+void minethd::work_main()
+{
+	uint64_t iCount = 0;
+
+	cryptonight_ctx* cpu_ctx;
+	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
+	cn_hash_fun hash_fun = cpu::minethd::func_selector(cpu::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
+	
+	GlobalStates::iConsumeCnt++;
+	
+	while (bQuit == 0)
+	{
+		if (oWork.bStall)
+		{
+			/*  We are stalled here because the executor didn't find a job for us yet,
+			    either because of network latency, or a socket problem. Since we are
+			    raison d'etre of this software it us sensible to just wait until we have something*/
+
+			while (GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+			consume_work();
+			continue;
+		}
+
+		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
+		pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
+		uint32_t target = oWork.iTarget32;
+		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
+
+		while(GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		{
+			cl_uint results[0x100];
+			memset(results,0,sizeof(cl_uint)*(0x100));
+
+			XMRRunJob(pGpuCtx, results);
+
+			for(size_t i = 0; i < results[0xFF]; i++)
+			{
+				uint8_t	bWorkBlob[112];
+				uint8_t	bResult[32];
+
+				memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
+				memset(bResult, 0, sizeof(job_result::bResult));
+
+				*(uint32_t*)(bWorkBlob + 39) = results[i];
+
+				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
+				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
+				{
+					std::cout<<"found AMD"<<std::endl;
+					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId));
+				}
+				else
+					std::cout<<"wrong AMD"<<std::endl;
+
+				//executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId));
+			}
+
+			iCount += pGpuCtx->rawIntensity;
+			using namespace std::chrono;
+			uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
+			iHashCount.store(iCount, std::memory_order_relaxed);
+			iTimestamp.store(iStamp, std::memory_order_relaxed);
+			std::this_thread::yield();
+		}
+
+		consume_work();
+	}
+}
+
+} // namespace amd
+} // namespace xmrstak
diff --git a/backend/amd/minethd.h b/backend/amd/minethd.h
new file mode 100644
index 0000000..d308110
--- /dev/null
+++ b/backend/amd/minethd.h
@@ -0,0 +1,60 @@
+#pragma once
+#include <thread>
+#include <atomic>
+#include "./jconf.h"
+#include "../IBackend.hpp"
+
+#include "amd_gpu/gpu.h"
+
+namespace xmrstak
+{
+namespace amd
+{
+
+class minethd  : public IBackend
+{
+public:
+
+	static void switch_work(miner_work& pWork);
+	static std::vector<IBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork);
+	static bool init_gpus();
+
+private:
+	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
+	
+	minethd(miner_work& pWork, size_t iNo, GpuContext* ctx);
+
+	// We use the top 8 bits of the nonce for thread and resume
+	// This allows us to resume up to 64 threads 4 times before
+	// we get nonce collisions
+	// Bottom 24 bits allow for an hour of work at 4000 H/s
+	inline uint32_t calc_start_nonce(uint32_t resume)
+	{
+		return reverseBits<uint32_t>(iThreadNo + GlobalStates::iThreadCount * resume);
+	}
+	
+	void work_main();
+	void double_work_main();
+	void consume_work();
+
+	uint64_t iJobNo;
+
+	static miner_work oGlobalWork;
+	miner_work oWork;
+
+	std::thread oWorkThd;
+	uint8_t iThreadNo;
+
+	bool bQuit;
+	bool bNoPrefetch;
+
+	//Mutable ptr to vector below, different for each thread
+	GpuContext* pGpuCtx;
+
+	// WARNING - this vector (but not its contents) must be immutable
+	// once the threads are started
+	static std::vector<GpuContext> vGpuData;
+};
+
+} // namespace amd
+} // namespace xmrstak
-- 
GitLab