Skip to content
Snippets Groups Projects
Commit de4cce9a authored by psychocrypt's avatar psychocrypt
Browse files

add amd backend

- add backend
- add auto suggestion
parent 926f9f03
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
#pragma once
#if defined(__APPLE__)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <stdint.h>
#include <vector>
#include "../../../console.h"
#define ERR_SUCCESS (0)
#define ERR_OCL_API (2)
#define ERR_STUPID_PARAMS (1)
struct GpuContext
{
/*Input vars*/
size_t deviceIdx;
size_t rawIntensity;
size_t workSize;
/*Output vars*/
cl_device_id DeviceID;
cl_command_queue CommandQueues;
cl_mem InputBuffer;
cl_mem OutputBuffer;
cl_mem ExtraBuffers[6];
cl_program Program;
cl_kernel Kernels[7];
size_t freeMem;
int computeUnits;
std::string name;
size_t Nonce;
};
uint32_t getNumPlatforms();
int getAMDPlatformIdx();
std::vector<GpuContext> getAMDDevices(int index);
size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target);
size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput);
#pragma once
#include "autoAdjust.hpp"
#include "jconf.h"
#include "../../console.h"
#include "../../ConfigEditor.hpp"
#include "amd_gpu/gpu.h"
#include <vector>
#include <cstdio>
#include <sstream>
#include <string>
#include <iostream>
#include <algorithm>
#if defined(__APPLE__)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
namespace xmrstak
{
namespace amd
{
class autoAdjust
{
public:
autoAdjust()
{
}
/** print the adjusted values if needed
*
* Routine exit the application and print the adjusted values if needed else
* nothing is happened.
*/
bool printConfig()
{
int platformIndex = getAMDPlatformIdx();
if(platformIndex == -1)
{
printer::inst()->print_msg(L0,"WARNING: No AMD OpenCL platform found. Possible driver issues or wrong vendor driver.");
return false;
}
devVec = getAMDDevices(0);
int deviceCount = devVec.size();
if(deviceCount == 0)
return false;
generateThreadConfig(platformIndex);
return true;
}
private:
void generateThreadConfig(const int platformIndex)
{
// load the template of the backend config into a char variable
const char *tpl =
#include "./config.tpl"
;
ConfigEditor configTpl{};
configTpl.set( std::string(tpl) );
std::string conf;
conf += std::string("\"gpu_threads_conf\" :\n[\n");
int i = 0;
for(auto& ctx : devVec)
{
// use 90% of available memory
size_t availableMem = (ctx.freeMem * 100u) / 110;
size_t units = ctx.computeUnits;
size_t perThread = (size_t(1u)<<21) + 224u;
size_t max_intensity = availableMem / perThread;
size_t intensity = std::min( size_t(1000u) , max_intensity );
conf += std::string(" // gpu: ") + ctx.name + "\n";
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
" \"affine_to_cpu\" : false, \n"
" },\n";
++i;
}
conf += std::string("],\n\n");
configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
configTpl.replace("NUMGPUS",std::to_string(devVec.size()));
configTpl.replace("GPUCONFIG",conf);
configTpl.write("amd.txt");
printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", "amd.txt");
}
std::vector<GpuContext> devVec;
};
} // namespace amd
} // namepsace xmrstak
R"===(
/*
* Number of GPUs that you have in your system. Each GPU will get its own CPU thread.
*/
"gpu_thread_num" : NUMGPUS,
/*
* GPU configuration. You should play around with intensity and worksize as the fastest settings will vary.
* index - GPU index number usually starts from 0
* intensity - Number of parallel GPU threads (nothing to do with CPU threads)
* worksize - Number of local GPU threads (nothing to do with CPU threads)
* affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner.
*/
GPUCONFIG
/*
* Platform index. This will be 0 unless you have different OpenCL platform - eg. AMD and Intel.
*/
"platform_index" : PLATFORMINDEX,
)==="
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Additional permission under GNU GPL version 3 section 7
*
* If you modify this Program, or any covered work, by linking or combining
* it with OpenSSL (or a modified version of that library), containing parts
* covered by the terms of OpenSSL License and SSLeay License, the licensors
* of this Program grant you additional permission to convey the resulting work.
*
*/
#include "jconf.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#define strcasecmp _stricmp
#include <intrin.h>
#else
#include <cpuid.h>
#endif
#include "../../rapidjson/document.h"
#include "../../rapidjson/error/en.h"
#include "../../jext.h"
#include "../../console.h"
namespace xmrstak
{
namespace amd
{
using namespace rapidjson;
/*
* This enum needs to match index in oConfigValues, otherwise we will get a runtime error
*/
enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx };
struct configVal {
configEnum iName;
const char* sName;
Type iType;
};
//Same order as in configEnum, as per comment above
configVal oConfigValues[] = {
{ iGpuThreadNum, "gpu_thread_num", kNumberType },
{ aGpuThreadsConf, "gpu_threads_conf", kArrayType },
{ iPlatformIdx, "platform_index", kNumberType }
};
constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0]));
inline bool checkType(Type have, Type want)
{
if(want == have)
return true;
else if(want == kTrueType && have == kFalseType)
return true;
else if(want == kFalseType && have == kTrueType)
return true;
else
return false;
}
struct jconf::opaque_private
{
Document jsonDoc;
const Value* configValues[iConfigCnt]; //Compile time constant
opaque_private()
{
}
};
jconf* jconf::oInst = nullptr;
jconf::jconf()
{
prv = new opaque_private();
}
bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg)
{
if(id >= prv->configValues[aGpuThreadsConf]->Size())
return false;
const Value& oThdConf = prv->configValues[aGpuThreadsConf]->GetArray()[id];
if(!oThdConf.IsObject())
return false;
const Value *idx, *intensity, *w_size, *aff;
idx = GetObjectMember(oThdConf, "index");
intensity = GetObjectMember(oThdConf, "intensity");
w_size = GetObjectMember(oThdConf, "worksize");
aff = GetObjectMember(oThdConf, "affine_to_cpu");
if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr)
return false;
if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64())
return false;
if(!aff->IsUint64() && !aff->IsBool())
return false;
cfg.index = idx->GetUint64();
cfg.intensity = intensity->GetUint64();
cfg.w_size = w_size->GetUint64();
if(aff->IsNumber())
cfg.cpu_aff = aff->GetInt64();
else
cfg.cpu_aff = -1;
return true;
}
size_t jconf::GetPlatformIdx()
{
return prv->configValues[iPlatformIdx]->GetUint64();
}
size_t jconf::GetThreadCount()
{
return prv->configValues[aGpuThreadsConf]->Size();
}
bool jconf::parse_config(const char* sFilename)
{
FILE * pFile;
char * buffer;
size_t flen;
pFile = fopen(sFilename, "rb");
if (pFile == NULL)
{
printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename);
return false;
}
fseek(pFile,0,SEEK_END);
flen = ftell(pFile);
rewind(pFile);
if(flen >= 64*1024)
{
fclose(pFile);
printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename);
return false;
}
if(flen <= 16)
{
printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename);
return false;
}
buffer = (char*)malloc(flen + 3);
if(fread(buffer+1, flen, 1, pFile) != 1)
{
free(buffer);
fclose(pFile);
printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename);
return false;
}
fclose(pFile);
//Replace Unicode BOM with spaces - we always use UTF-8
unsigned char* ubuffer = (unsigned char*)buffer;
if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF)
{
buffer[1] = ' ';
buffer[2] = ' ';
buffer[3] = ' ';
}
buffer[0] = '{';
buffer[flen] = '}';
buffer[flen + 1] = '\0';
prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2);
free(buffer);
if(prv->jsonDoc.HasParseError())
{
printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s",
int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError()));
return false;
}
if(!prv->jsonDoc.IsObject())
{ //This should never happen as we created the root ourselves
printer::inst()->print_msg(L0, "Invalid config file. No root?\n");
return false;
}
for(size_t i = 0; i < iConfigCnt; i++)
{
if(oConfigValues[i].iName != i)
{
printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order.");
return false;
}
prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName);
if(prv->configValues[i] == nullptr)
{
printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName);
return false;
}
if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType))
{
printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName);
return false;
}
}
size_t n_thd = prv->configValues[aGpuThreadsConf]->Size();
if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd)
{
printer::inst()->print_msg(L0,
"Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.",
int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64()));
return false;
}
thd_cfg c;
for(size_t i=0; i < n_thd; i++)
{
if(!GetThreadConfig(i, c))
{
printer::inst()->print_msg(L0, "Thread %llu has invalid config.", int_port(i));
return false;
}
}
}
} // namespace amd
} // namespace xmrstak
#pragma once
#include <stdlib.h>
#include <string>
namespace xmrstak
{
namespace amd
{
class jconf
{
public:
static jconf* inst()
{
if (oInst == nullptr) oInst = new jconf;
return oInst;
};
bool parse_config(const char* sFilename = "amd.txt");
struct thd_cfg {
size_t index;
size_t intensity;
size_t w_size;
long long cpu_aff;
};
size_t GetThreadCount();
bool GetThreadConfig(size_t id, thd_cfg &cfg);
size_t GetPlatformIdx();
private:
jconf();
static jconf* oInst;
struct opaque_private;
opaque_private* prv;
};
} // namespace amd
} // namespace xmrstak
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Additional permission under GNU GPL version 3 section 7
*
* If you modify this Program, or any covered work, by linking or combining
* it with OpenSSL (or a modified version of that library), containing parts
* covered by the terms of OpenSSL License and SSLeay License, the licensors
* of this Program grant you additional permission to convey the resulting work.
*
*/
#include <assert.h>
#include <cmath>
#include <chrono>
#include <thread>
#include "../../ConfigEditor.hpp"
#include "autoAdjust.hpp"
#include <vector>
#include "../../console.h"
#include "../../crypto/cryptonight_aesni.h"
#include "../cpu/minethd.h"
#include "../cpu/jconf.h"
#include "../../executor.h"
#include "minethd.h"
#include "../../jconf.h"
#include "../../crypto/cryptonight.h"
#include "amd_gpu/gpu.h"
namespace xmrstak
{
namespace amd
{
minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
{
oWork = pWork;
bQuit = 0;
iThreadNo = (uint8_t)iNo;
iJobNo = 0;
iHashCount = 0;
iTimestamp = 0;
pGpuCtx = ctx;
oWorkThd = std::thread(&minethd::work_main, this);
}
extern "C" std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork)
{
return amd::minethd::thread_starter(threadOffset, pWork);
}
bool minethd::init_gpus()
{
size_t i, n = jconf::inst()->GetThreadCount();
printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while...");
vGpuData.resize(n);
jconf::thd_cfg cfg;
for(i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);
vGpuData[i].deviceIdx = cfg.index;
vGpuData[i].rawIntensity = cfg.intensity;
vGpuData[i].workSize = cfg.w_size;
}
return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS;
}
std::vector<GpuContext> minethd::vGpuData;
std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
{
std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>();
if(!ConfigEditor::file_exist("amd.txt"))
{
autoAdjust adjust;
if(!adjust.printConfig())
return pvThreads;
}
/*
if(!ConfigEditor::file_exist("amd.txt"))
{
printer::inst()->print_msg(L0, "WARNING: missing config file 'amd.txt'");
return pvThreads;
}
*/
if(!jconf::inst()->parse_config())
{
win_exit();
}
// \ todo get device count and exit if no opencl device
if(!init_gpus())
{
printer::inst()->print_msg(L1, "WARNING: AMD device not found");
return pvThreads;
}
size_t i, n = jconf::inst()->GetThreadCount();
pvThreads->reserve(n);
jconf::thd_cfg cfg;
for (i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);
minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]);
if(cfg.cpu_aff >= 0)
{
#if defined(__APPLE__)
printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory.");
#endif
cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
}
pvThreads->push_back(thd);
if(cfg.cpu_aff >= 0)
printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
else
printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
}
return pvThreads;
}
void minethd::switch_work(miner_work& pWork)
{
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
// faster than threads can consume them. This should never happen in real life.
// Pool cant physically send jobs faster than every 250ms or so due to net latency.
while (GlobalStates::iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::iThreadCount)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
GlobalStates::oGlobalWork = pWork;
GlobalStates::iConsumeCnt.store(0, std::memory_order_seq_cst);
GlobalStates::iGlobalJobNo++;
}
void minethd::consume_work()
{
memcpy(&oWork, &GlobalStates::oGlobalWork, sizeof(miner_work));
iJobNo++;
GlobalStates::iConsumeCnt++;
}
void minethd::work_main()
{
uint64_t iCount = 0;
cryptonight_ctx* cpu_ctx;
cpu_ctx = cpu::minethd::minethd_alloc_ctx();
cn_hash_fun hash_fun = cpu::minethd::func_selector(cpu::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
GlobalStates::iConsumeCnt++;
while (bQuit == 0)
{
if (oWork.bStall)
{
/* We are stalled here because the executor didn't find a job for us yet,
either because of network latency, or a socket problem. Since we are
raison d'etre of this software it us sensible to just wait until we have something*/
while (GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
consume_work();
continue;
}
assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
uint32_t target = oWork.iTarget32;
XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
while(GlobalStates::iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
cl_uint results[0x100];
memset(results,0,sizeof(cl_uint)*(0x100));
XMRRunJob(pGpuCtx, results);
for(size_t i = 0; i < results[0xFF]; i++)
{
uint8_t bWorkBlob[112];
uint8_t bResult[32];
memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
memset(bResult, 0, sizeof(job_result::bResult));
*(uint32_t*)(bWorkBlob + 39) = results[i];
hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx);
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
{
std::cout<<"found AMD"<<std::endl;
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId));
}
else
std::cout<<"wrong AMD"<<std::endl;
//executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId));
}
iCount += pGpuCtx->rawIntensity;
using namespace std::chrono;
uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
iHashCount.store(iCount, std::memory_order_relaxed);
iTimestamp.store(iStamp, std::memory_order_relaxed);
std::this_thread::yield();
}
consume_work();
}
}
} // namespace amd
} // namespace xmrstak
#pragma once
#include <thread>
#include <atomic>
#include "./jconf.h"
#include "../IBackend.hpp"
#include "amd_gpu/gpu.h"
namespace xmrstak
{
namespace amd
{
class minethd : public IBackend
{
public:
static void switch_work(miner_work& pWork);
static std::vector<IBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork);
static bool init_gpus();
private:
typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
minethd(miner_work& pWork, size_t iNo, GpuContext* ctx);
// We use the top 8 bits of the nonce for thread and resume
// This allows us to resume up to 64 threads 4 times before
// we get nonce collisions
// Bottom 24 bits allow for an hour of work at 4000 H/s
inline uint32_t calc_start_nonce(uint32_t resume)
{
return reverseBits<uint32_t>(iThreadNo + GlobalStates::iThreadCount * resume);
}
void work_main();
void double_work_main();
void consume_work();
uint64_t iJobNo;
static miner_work oGlobalWork;
miner_work oWork;
std::thread oWorkThd;
uint8_t iThreadNo;
bool bQuit;
bool bNoPrefetch;
//Mutable ptr to vector below, different for each thread
GpuContext* pGpuCtx;
// WARNING - this vector (but not its contents) must be immutable
// once the threads are started
static std::vector<GpuContext> vGpuData;
};
} // namespace amd
} // namespace xmrstak
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment