From de4cce9a9bf9b67dbe26098972d3c1b4e00d597f Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocrypt@users.noreply.github.com> Date: Sun, 24 Sep 2017 20:03:42 +0200 Subject: [PATCH] add amd backend - add backend - add auto suggestion --- backend/amd/amd_gpu/gpu.cpp | 869 ++++++++++++++++++++++++++++++++++++ backend/amd/amd_gpu/gpu.h | 50 +++ backend/amd/autoAdjust.hpp | 113 +++++ backend/amd/config.tpl | 24 + backend/amd/jconf.cpp | 261 +++++++++++ backend/amd/jconf.h | 43 ++ backend/amd/minethd.cpp | 241 ++++++++++ backend/amd/minethd.h | 60 +++ 8 files changed, 1661 insertions(+) create mode 100644 backend/amd/amd_gpu/gpu.cpp create mode 100644 backend/amd/amd_gpu/gpu.h create mode 100644 backend/amd/autoAdjust.hpp create mode 100644 backend/amd/config.tpl create mode 100644 backend/amd/jconf.cpp create mode 100644 backend/amd/jconf.h create mode 100644 backend/amd/minethd.cpp create mode 100644 backend/amd/minethd.h diff --git a/backend/amd/amd_gpu/gpu.cpp b/backend/amd/amd_gpu/gpu.cpp new file mode 100644 index 0000000..ebf1093 --- /dev/null +++ b/backend/amd/amd_gpu/gpu.cpp @@ -0,0 +1,869 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <iostream> +#include <vector> + +#ifdef _WIN32 +#include <windows.h> +const char* sSourcePath = "opencl\\cryptonight.cl"; + +static inline void port_sleep(size_t sec) +{ + Sleep(sec * 1000); +} +#else +#include <unistd.h> +const char* sSourcePath = "opencl/cryptonight.cl"; + +static inline void port_sleep(size_t sec) +{ + sleep(sec); +} +#endif // _WIN32 + +#if 0 +static inline long long unsigned int int_port(size_t i) +{ + return i; +} +#endif + +#include "gpu.h" + +const char* err_to_str(cl_int ret) +{ + switch(ret) + { + case CL_SUCCESS: + return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: + return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: + return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: + return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: + return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: + return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: + return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: + return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: + return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: + return "CL_MAP_FAILURE"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + case CL_COMPILE_PROGRAM_FAILURE: + return "CL_COMPILE_PROGRAM_FAILURE"; + case CL_LINKER_NOT_AVAILABLE: + return "CL_LINKER_NOT_AVAILABLE"; + case CL_LINK_PROGRAM_FAILURE: + return "CL_LINK_PROGRAM_FAILURE"; + case CL_DEVICE_PARTITION_FAILED: + return "CL_DEVICE_PARTITION_FAILED"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: + return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; + case CL_INVALID_VALUE: + return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: + return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: + return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: + return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: + return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: + return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: + return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: + return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: + return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: + return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: + return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: + return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: + return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: + return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: + return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: + return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: + return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: + return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: + return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: + return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: + return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: + return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: + return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: + return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: + return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: + return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: + return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: + return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: + return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: + return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: + return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: + return "CL_INVALID_GLOBAL_WORK_SIZE"; + case CL_INVALID_PROPERTY: + return "CL_INVALID_PROPERTY"; + case CL_INVALID_IMAGE_DESCRIPTOR: + return "CL_INVALID_IMAGE_DESCRIPTOR"; + case CL_INVALID_COMPILER_OPTIONS: + return "CL_INVALID_COMPILER_OPTIONS"; + case CL_INVALID_LINKER_OPTIONS: + return "CL_INVALID_LINKER_OPTIONS"; + case CL_INVALID_DEVICE_PARTITION_COUNT: + return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#ifdef CL_VERSION_2_0 + case CL_INVALID_PIPE_SIZE: + return "CL_INVALID_PIPE_SIZE"; + case CL_INVALID_DEVICE_QUEUE: + return "CL_INVALID_DEVICE_QUEUE"; +#endif + default: + return "UNKNOWN_ERROR"; + } +} + +#if 0 +void printer::inst()->print_msg(L1,const char* fmt, ...); +void printer::inst()->print_str(const char* str); +#endif + +char* LoadTextFile(const char* filename) +{ + size_t flen; + char* out; + FILE* kernel = fopen(filename, "rb"); + + if(kernel == NULL) + return NULL; + + fseek(kernel, 0, SEEK_END); + flen = ftell(kernel); + fseek(kernel, 0, SEEK_SET); + + out = (char*)malloc(flen+1); + size_t r = fread(out, flen, 1, kernel); + fclose(kernel); + + if(r != 1) + { + free(out); + return NULL; + } + + out[flen] = '\0'; + return out; +} + +size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, char* source_code) +{ + size_t MaximumWorkSize; + cl_int ret; + + if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaximumWorkSize, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when querying a device's max worksize using clGetDeviceInfo.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize); +#ifdef CL_VERSION_2_0 + const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 }; + ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret); +#else + const cl_command_queue_properties CommandQueueProperties = { 0 }; + ctx->CommandQueues = clCreateCommandQueue(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret); +#endif + + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateCommandQueueWithProperties.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 88, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + size_t g_thd = ctx->rawIntensity; + ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, (1 << 21) * g_thd, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->ExtraBuffers[1] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, 200 * g_thd, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash states buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Blake-256 branches + ctx->ExtraBuffers[2] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 0 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Groestl-256 branches + ctx->ExtraBuffers[3] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 1 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // JH-256 branches + ctx->ExtraBuffers[4] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 2 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Skein-512 branches + ctx->ExtraBuffers[5] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 3 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Assume we may find up to 0xFF nonces in one run - it's reasonable + ctx->OutputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create output buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the contents of cryptonight.cl", err_to_str(ret)); + return ERR_OCL_API; + } + + char options[32]; + snprintf(options, sizeof(options), "-I. -DWORKSIZE=%llu", int_port(ctx->workSize)); + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); + if(ret != CL_SUCCESS) + { + size_t len; + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* BuildLog = (char*)malloc(len + 1); + BuildLog[0] = '\0'; + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + { + free(BuildLog); + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_str("Build log:\n"); + printer::inst()->print_str(BuildLog); + + free(BuildLog); + return ERR_OCL_API; + } + + cl_build_status status; + do + { + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + return ERR_OCL_API; + } + port_sleep(1); + } + while(status == CL_BUILD_IN_PROGRESS); + + const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; + for(int i = 0; i < 7; ++i) + { + ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel %s.", err_to_str(ret), KernelNames[i]); + return ERR_OCL_API; + } + } + + ctx->Nonce = 0; + return 0; +} + +const cl_platform_info attributeTypes[5] = { + CL_PLATFORM_NAME, + CL_PLATFORM_VENDOR, + CL_PLATFORM_VERSION, + CL_PLATFORM_PROFILE, + CL_PLATFORM_EXTENSIONS +}; + +const char* const attributeNames[] = { + "CL_PLATFORM_NAME", + "CL_PLATFORM_VENDOR", + "CL_PLATFORM_VERSION", + "CL_PLATFORM_PROFILE", + "CL_PLATFORM_EXTENSIONS" +}; + +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +void PrintDeviceInfo(cl_device_id device) +{ + char queryBuffer[1024]; + int queryInt; + cl_int clError; + clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_NAME: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_VENDOR: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DRIVER_VERSION: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_VERSION: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL); + printf(" CL_DEVICE_MAX_COMPUTE_UNITS: %d\n", queryInt); +} + +uint32_t getNumPlatforms() +{ + cl_uint num_platforms = 0; + cl_platform_id * platforms = NULL; + cl_int clStatus; + + // Get platform and device information + clStatus = clGetPlatformIDs(0, NULL, &num_platforms); + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms); + clStatus = clGetPlatformIDs(num_platforms, platforms, NULL); + + return num_platforms; +} + +std::vector<GpuContext> getAMDDevices(int index) +{ + std::vector<GpuContext> ctxVec; + cl_platform_id * platforms = NULL; + cl_int clStatus; + cl_uint num_devices; + cl_device_id *device_list = NULL; + + uint32_t numPlatforms = getNumPlatforms(); + + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); + for (int k = 0; k < num_devices; k++) { + cl_int clError; + std::vector<char> devVendorVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL); + std::string devVendor(devVendorVec.data()); + if( devVendor.find("Advanced Micro Devices") != std::string::npos) + { + GpuContext ctx; + ctx.deviceIdx = k; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL); + size_t maxMem; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL); + // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory + ctx.freeMem = std::min(ctx.freeMem, maxMem); + std::vector<char> devNameVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL); + ctx.name = std::string(devNameVec.data()); + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); + ctx.DeviceID = device_list[k]; + ctxVec.push_back(ctx); + } + } + + + free(device_list); + free(platforms); + + return ctxVec; +} + +int getAMDPlatformIdx() +{ + + uint32_t numPlatforms = getNumPlatforms(); + + if(numPlatforms == 0) + { + printer::inst()->print_msg(L0,"WARNING: No OpenCL platform found."); + return -1; + } + cl_platform_id * platforms = NULL; + cl_int clStatus; + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + int platformIndex = -1; + + for (int i = 0; i < numPlatforms; i++) { + size_t infoSize; + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); + + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") != std::string::npos) + { + platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); + break; + } + } + + free(platforms); + return platformIndex; +} + +// RequestedDeviceIdxs is a list of OpenCL device indexes +// NumDevicesRequested is number of devices in RequestedDeviceIdxs list +// Returns 0 on success, -1 on stupid params, -2 on OpenCL API error +size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) +{ + + cl_context opencl_ctx; + cl_int ret; + cl_uint entries; + + if((ret = clGetPlatformIDs(0, NULL, &entries)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for number of platforms.", err_to_str(ret)); + return ERR_OCL_API; + } + + + // The number of platforms naturally is the index of the last platform plus one. + if(entries <= platform_idx) + { + printer::inst()->print_msg(L1,"Selected OpenCL platform index %d doesn't exist.", platform_idx); + return ERR_STUPID_PARAMS; + } + + + + cl_platform_id * platforms = NULL; + cl_int clStatus; + uint32_t numPlatforms = getNumPlatforms(); + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + size_t infoSize; + clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); + clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") == std::string::npos) + { + printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); + return ERR_STUPID_PARAMS; + } + + free(platforms); + + /*MSVC skimping on devel costs by shoehorning C99 to be a subset of C++? Noooo... can't be.*/ +#ifdef __GNUC__ + cl_platform_id PlatformIDList[entries]; +#else + cl_platform_id* PlatformIDList = _alloca(entries * sizeof(cl_platform_id)); +#endif + if((ret = clGetPlatformIDs(entries, PlatformIDList, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for platform ID information.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Same as the platform index sanity check, except we must check all requested device indexes + for(int i = 0; i < num_gpus; ++i) + { + if(entries <= ctx[i].deviceIdx) + { + printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx); + return ERR_STUPID_PARAMS; + } + } + +#ifdef __GNUC__ + cl_device_id DeviceIDList[entries]; +#else + cl_device_id* DeviceIDList = _alloca(entries * sizeof(cl_device_id)); +#endif + if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, entries, DeviceIDList, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for device ID information.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Indexes sanity checked above +#ifdef __GNUC__ + cl_device_id TempDeviceList[num_gpus]; +#else + cl_device_id* TempDeviceList = _alloca(entries * sizeof(cl_device_id)); +#endif + for(int i = 0; i < num_gpus; ++i) + { + ctx[i].DeviceID = DeviceIDList[ctx[i].deviceIdx]; + TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx]; + } + + opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList, NULL, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* source_code = LoadTextFile(sSourcePath); + if(source_code == NULL) + { + printer::inst()->print_msg(L1,"Couldn't locate GPU source code file at %s.", sSourcePath); + return ERR_STUPID_PARAMS; + } + + for(int i = 0; i < num_gpus; ++i) + { + if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code)) != ERR_SUCCESS) + { + free(source_code); + return ret; + } + } + free(source_code); + + return ERR_SUCCESS; +} + +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target) +{ + cl_int ret; + + if(input_len > 84) + return ERR_STUPID_PARAMS; + + input[input_len] = 0x01; + memset(input + input_len + 1, 0, 88 - input_len - 1); + + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clSetKernelArg(ctx->Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret)); + return ERR_OCL_API; + } + + // CN2 Kernel + + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // CN3 Kernel + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 0 + if((ret = clSetKernelArg(ctx->Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 1 + if((ret = clSetKernelArg(ctx->Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 2 + if((ret = clSetKernelArg(ctx->Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 3 + if((ret = clSetKernelArg(ctx->Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret)); + return ERR_OCL_API; + } + + for(int i = 0; i < 4; ++i) + { + // States + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0); + return ERR_OCL_API; + } + + // Nonce buffer + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1); + return ERR_OCL_API; + } + + // Output + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2); + return ERR_OCL_API; + } + + // Target + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_uint), &target)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3); + return ERR_OCL_API; + } + } + + return ERR_SUCCESS; +} + +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) +{ + cl_int ret; + cl_uint zero = 0; + size_t BranchNonces[4]; + memset(BranchNonces,0,sizeof(size_t)*4); + + size_t g_thd = ctx->rawIntensity; + size_t w_size = ctx->workSize; + + for(int i = 2; i < 6; ++i) + { + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->ExtraBuffers[i], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to zero branch buffer counter %d.", err_to_str(ret), i - 2); + return ERR_OCL_API; + } + } + + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + + size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 }; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0); + return ERR_OCL_API; + } + + /*for(int i = 1; i < 3; ++i) + { + if((ret = clEnqueueNDRangeKernel(*ctx->CommandQueues, ctx->Kernels[i], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + Log(LOG_CRITICAL, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i); + return(ERR_OCL_API); + } + }*/ + + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1); + return ERR_OCL_API; + } + + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[2], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[3], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 1, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[4], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 2, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[5], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 3, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + + for(int i = 0; i < 4; ++i) + { + if(BranchNonces[i]) + { + // Threads + if((clSetKernelArg(ctx->Kernels[i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4); + return(ERR_OCL_API); + } + + BranchNonces[i] = ((size_t)ceil( (double)BranchNonces[i] / (double)w_size) ) * w_size; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3); + return ERR_OCL_API; + } + } + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_TRUE, 0, sizeof(cl_uint) * 0x100, HashOutput, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + ctx->Nonce += g_thd; + + return ERR_SUCCESS; +} diff --git a/backend/amd/amd_gpu/gpu.h b/backend/amd/amd_gpu/gpu.h new file mode 100644 index 0000000..8a71cfa --- /dev/null +++ b/backend/amd/amd_gpu/gpu.h @@ -0,0 +1,50 @@ +#pragma once + +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#include <stdint.h> +#include <vector> +#include "../../../console.h" + +#define ERR_SUCCESS (0) +#define ERR_OCL_API (2) +#define ERR_STUPID_PARAMS (1) + + + +struct GpuContext +{ + /*Input vars*/ + size_t deviceIdx; + size_t rawIntensity; + size_t workSize; + + /*Output vars*/ + cl_device_id DeviceID; + cl_command_queue CommandQueues; + cl_mem InputBuffer; + cl_mem OutputBuffer; + cl_mem ExtraBuffers[6]; + cl_program Program; + cl_kernel Kernels[7]; + size_t freeMem; + int computeUnits; + std::string name; + + size_t Nonce; + +}; + +uint32_t getNumPlatforms(); +int getAMDPlatformIdx(); +std::vector<GpuContext> getAMDDevices(int index); + +size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target); +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput); + + diff --git a/backend/amd/autoAdjust.hpp b/backend/amd/autoAdjust.hpp new file mode 100644 index 0000000..fd9ea94 --- /dev/null +++ b/backend/amd/autoAdjust.hpp @@ -0,0 +1,113 @@ + +#pragma once + +#include "autoAdjust.hpp" + + +#include "jconf.h" +#include "../../console.h" +#include "../../ConfigEditor.hpp" +#include "amd_gpu/gpu.h" + +#include <vector> +#include <cstdio> +#include <sstream> +#include <string> +#include <iostream> +#include <algorithm> + +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + + +namespace xmrstak +{ +namespace amd +{ + +class autoAdjust +{ +public: + + autoAdjust() + { + + } + + /** print the adjusted values if needed + * + * Routine exit the application and print the adjusted values if needed else + * nothing is happened. + */ + bool printConfig() + { + + int platformIndex = getAMDPlatformIdx(); + + if(platformIndex == -1) + { + printer::inst()->print_msg(L0,"WARNING: No AMD OpenCL platform found. Possible driver issues or wrong vendor driver."); + return false; + } + + devVec = getAMDDevices(0); + + + int deviceCount = devVec.size(); + + if(deviceCount == 0) + return false; + + + generateThreadConfig(platformIndex); + return true; + + } + +private: + + void generateThreadConfig(const int platformIndex) + { + // load the template of the backend config into a char variable + const char *tpl = + #include "./config.tpl" + ; + + ConfigEditor configTpl{}; + configTpl.set( std::string(tpl) ); + + std::string conf; + conf += std::string("\"gpu_threads_conf\" :\n[\n"); + int i = 0; + for(auto& ctx : devVec) + { + // use 90% of available memory + size_t availableMem = (ctx.freeMem * 100u) / 110; + size_t units = ctx.computeUnits; + size_t perThread = (size_t(1u)<<21) + 224u; + size_t max_intensity = availableMem / perThread; + size_t intensity = std::min( size_t(1000u) , max_intensity ); + conf += std::string(" // gpu: ") + ctx.name + "\n"; + conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + + " \"affine_to_cpu\" : false, \n" + " },\n"; + ++i; + } + conf += std::string("],\n\n"); + + configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex)); + configTpl.replace("NUMGPUS",std::to_string(devVec.size())); + configTpl.replace("GPUCONFIG",conf); + configTpl.write("amd.txt"); + printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", "amd.txt"); + } + + std::vector<GpuContext> devVec; +}; + +} // namespace amd +} // namepsace xmrstak diff --git a/backend/amd/config.tpl b/backend/amd/config.tpl new file mode 100644 index 0000000..6d08730 --- /dev/null +++ b/backend/amd/config.tpl @@ -0,0 +1,24 @@ +R"===( + +/* + * Number of GPUs that you have in your system. Each GPU will get its own CPU thread. + */ +"gpu_thread_num" : NUMGPUS, + +/* + * GPU configuration. You should play around with intensity and worksize as the fastest settings will vary. + * index - GPU index number usually starts from 0 + * intensity - Number of parallel GPU threads (nothing to do with CPU threads) + * worksize - Number of local GPU threads (nothing to do with CPU threads) + * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. + */ + + +GPUCONFIG + +/* + * Platform index. This will be 0 unless you have different OpenCL platform - eg. AMD and Intel. + */ +"platform_index" : PLATFORMINDEX, + +)===" diff --git a/backend/amd/jconf.cpp b/backend/amd/jconf.cpp new file mode 100644 index 0000000..f8a551e --- /dev/null +++ b/backend/amd/jconf.cpp @@ -0,0 +1,261 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + + +#include "jconf.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#define strcasecmp _stricmp +#include <intrin.h> +#else +#include <cpuid.h> +#endif + +#include "../../rapidjson/document.h" +#include "../../rapidjson/error/en.h" +#include "../../jext.h" +#include "../../console.h" + +namespace xmrstak +{ +namespace amd +{ + +using namespace rapidjson; + +/* + * This enum needs to match index in oConfigValues, otherwise we will get a runtime error + */ +enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx }; + +struct configVal { + configEnum iName; + const char* sName; + Type iType; +}; + +//Same order as in configEnum, as per comment above +configVal oConfigValues[] = { + { iGpuThreadNum, "gpu_thread_num", kNumberType }, + { aGpuThreadsConf, "gpu_threads_conf", kArrayType }, + { iPlatformIdx, "platform_index", kNumberType } +}; + +constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); + +inline bool checkType(Type have, Type want) +{ + if(want == have) + return true; + else if(want == kTrueType && have == kFalseType) + return true; + else if(want == kFalseType && have == kTrueType) + return true; + else + return false; +} + +struct jconf::opaque_private +{ + Document jsonDoc; + const Value* configValues[iConfigCnt]; //Compile time constant + + opaque_private() + { + } +}; + +jconf* jconf::oInst = nullptr; + +jconf::jconf() +{ + prv = new opaque_private(); +} + +bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) +{ + if(id >= prv->configValues[aGpuThreadsConf]->Size()) + return false; + + const Value& oThdConf = prv->configValues[aGpuThreadsConf]->GetArray()[id]; + + if(!oThdConf.IsObject()) + return false; + + const Value *idx, *intensity, *w_size, *aff; + idx = GetObjectMember(oThdConf, "index"); + intensity = GetObjectMember(oThdConf, "intensity"); + w_size = GetObjectMember(oThdConf, "worksize"); + aff = GetObjectMember(oThdConf, "affine_to_cpu"); + + if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr) + return false; + + if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) + return false; + + if(!aff->IsUint64() && !aff->IsBool()) + return false; + + cfg.index = idx->GetUint64(); + cfg.intensity = intensity->GetUint64(); + cfg.w_size = w_size->GetUint64(); + + if(aff->IsNumber()) + cfg.cpu_aff = aff->GetInt64(); + else + cfg.cpu_aff = -1; + + return true; +} + +size_t jconf::GetPlatformIdx() +{ + return prv->configValues[iPlatformIdx]->GetUint64(); +} + +size_t jconf::GetThreadCount() +{ + return prv->configValues[aGpuThreadsConf]->Size(); +} + +bool jconf::parse_config(const char* sFilename) +{ + FILE * pFile; + char * buffer; + size_t flen; + + pFile = fopen(sFilename, "rb"); + if (pFile == NULL) + { + printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename); + return false; + } + + fseek(pFile,0,SEEK_END); + flen = ftell(pFile); + rewind(pFile); + + if(flen >= 64*1024) + { + fclose(pFile); + printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename); + return false; + } + + if(flen <= 16) + { + printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename); + return false; + } + + buffer = (char*)malloc(flen + 3); + if(fread(buffer+1, flen, 1, pFile) != 1) + { + free(buffer); + fclose(pFile); + printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename); + return false; + } + fclose(pFile); + + //Replace Unicode BOM with spaces - we always use UTF-8 + unsigned char* ubuffer = (unsigned char*)buffer; + if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF) + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = ' '; + } + + buffer[0] = '{'; + buffer[flen] = '}'; + buffer[flen + 1] = '\0'; + + prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2); + free(buffer); + + if(prv->jsonDoc.HasParseError()) + { + printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", + int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + return false; + } + + + if(!prv->jsonDoc.IsObject()) + { //This should never happen as we created the root ourselves + printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + return false; + } + + for(size_t i = 0; i < iConfigCnt; i++) + { + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } + + prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); + return false; + } + } + + size_t n_thd = prv->configValues[aGpuThreadsConf]->Size(); + if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd) + { + printer::inst()->print_msg(L0, + "Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.", + int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64())); + return false; + } + + thd_cfg c; + for(size_t i=0; i < n_thd; i++) + { + if(!GetThreadConfig(i, c)) + { + printer::inst()->print_msg(L0, "Thread %llu has invalid config.", int_port(i)); + return false; + } + } + +} + +} // namespace amd +} // namespace xmrstak diff --git a/backend/amd/jconf.h b/backend/amd/jconf.h new file mode 100644 index 0000000..463284a --- /dev/null +++ b/backend/amd/jconf.h @@ -0,0 +1,43 @@ +#pragma once +#include <stdlib.h> +#include <string> + +namespace xmrstak +{ +namespace amd +{ + +class jconf +{ +public: + static jconf* inst() + { + if (oInst == nullptr) oInst = new jconf; + return oInst; + }; + + bool parse_config(const char* sFilename = "amd.txt"); + + struct thd_cfg { + size_t index; + size_t intensity; + size_t w_size; + long long cpu_aff; + }; + + size_t GetThreadCount(); + bool GetThreadConfig(size_t id, thd_cfg &cfg); + + size_t GetPlatformIdx(); + +private: + jconf(); + static jconf* oInst; + + struct opaque_private; + opaque_private* prv; + +}; + +} // namespace amd +} // namespace xmrstak diff --git a/backend/amd/minethd.cpp b/backend/amd/minethd.cpp new file mode 100644 index 0000000..7e3c603 --- /dev/null +++ b/backend/amd/minethd.cpp @@ -0,0 +1,241 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <thread> + +#include "../../ConfigEditor.hpp" +#include "autoAdjust.hpp" + +#include <vector> +#include "../../console.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../cpu/minethd.h" +#include "../cpu/jconf.h" + +#include "../../executor.h" +#include "minethd.h" +#include "../../jconf.h" +#include "../../crypto/cryptonight.h" +#include "amd_gpu/gpu.h" + + +namespace xmrstak +{ +namespace amd +{ + +minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + iHashCount = 0; + iTimestamp = 0; + pGpuCtx = ctx; + + oWorkThd = std::thread(&minethd::work_main, this); +} + +extern "C" std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork) +{ + return amd::minethd::thread_starter(threadOffset, pWork); +} + + +bool minethd::init_gpus() +{ + size_t i, n = jconf::inst()->GetThreadCount(); + + printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while..."); + vGpuData.resize(n); + + jconf::thd_cfg cfg; + for(i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + vGpuData[i].deviceIdx = cfg.index; + vGpuData[i].rawIntensity = cfg.intensity; + vGpuData[i].workSize = cfg.w_size; + } + + return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; +} + +std::vector<GpuContext> minethd::vGpuData; + +std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + + if(!ConfigEditor::file_exist("amd.txt")) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } +/* + if(!ConfigEditor::file_exist("amd.txt")) + { + printer::inst()->print_msg(L0, "WARNING: missing config file 'amd.txt'"); + return pvThreads; + } +*/ + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + // \ todo get device count and exit if no opencl device + + if(!init_gpus()) + { + printer::inst()->print_msg(L1, "WARNING: AMD device not found"); + return pvThreads; + } + + size_t i, n = jconf::inst()->GetThreadCount(); + pvThreads->reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]); + + if(cfg.cpu_aff >= 0) + { +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); + } + + pvThreads->push_back(thd); + if(cfg.cpu_aff >= 0) + printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); + else + printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); + } + + return pvThreads; +} + +void minethd::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (GlobalStates::iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + GlobalStates::oGlobalWork = pWork; + GlobalStates::iConsumeCnt.store(0, std::memory_order_seq_cst); + GlobalStates::iGlobalJobNo++; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::iConsumeCnt++; + +} + +void minethd::work_main() +{ + uint64_t iCount = 0; + + cryptonight_ctx* cpu_ctx; + cpu_ctx = cpu::minethd::minethd_alloc_ctx(); + cn_hash_fun hash_fun = cpu::minethd::func_selector(cpu::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + + GlobalStates::iConsumeCnt++; + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); + uint32_t target = oWork.iTarget32; + XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); + + while(GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + cl_uint results[0x100]; + memset(results,0,sizeof(cl_uint)*(0x100)); + + XMRRunJob(pGpuCtx, results); + + for(size_t i = 0; i < results[0xFF]; i++) + { + uint8_t bWorkBlob[112]; + uint8_t bResult[32]; + + memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memset(bResult, 0, sizeof(job_result::bResult)); + + *(uint32_t*)(bWorkBlob + 39) = results[i]; + + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx); + if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) + { + std::cout<<"found AMD"<<std::endl; + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId)); + } + else + std::cout<<"wrong AMD"<<std::endl; + + //executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId)); + } + + iCount += pGpuCtx->rawIntensity; + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + std::this_thread::yield(); + } + + consume_work(); + } +} + +} // namespace amd +} // namespace xmrstak diff --git a/backend/amd/minethd.h b/backend/amd/minethd.h new file mode 100644 index 0000000..d308110 --- /dev/null +++ b/backend/amd/minethd.h @@ -0,0 +1,60 @@ +#pragma once +#include <thread> +#include <atomic> +#include "./jconf.h" +#include "../IBackend.hpp" + +#include "amd_gpu/gpu.h" + +namespace xmrstak +{ +namespace amd +{ + +class minethd : public IBackend +{ +public: + + static void switch_work(miner_work& pWork); + static std::vector<IBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork); + static bool init_gpus(); + +private: + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); + + minethd(miner_work& pWork, size_t iNo, GpuContext* ctx); + + // We use the top 8 bits of the nonce for thread and resume + // This allows us to resume up to 64 threads 4 times before + // we get nonce collisions + // Bottom 24 bits allow for an hour of work at 4000 H/s + inline uint32_t calc_start_nonce(uint32_t resume) + { + return reverseBits<uint32_t>(iThreadNo + GlobalStates::iThreadCount * resume); + } + + void work_main(); + void double_work_main(); + void consume_work(); + + uint64_t iJobNo; + + static miner_work oGlobalWork; + miner_work oWork; + + std::thread oWorkThd; + uint8_t iThreadNo; + + bool bQuit; + bool bNoPrefetch; + + //Mutable ptr to vector below, different for each thread + GpuContext* pGpuCtx; + + // WARNING - this vector (but not its contents) must be immutable + // once the threads are started + static std::vector<GpuContext> vGpuData; +}; + +} // namespace amd +} // namespace xmrstak -- GitLab