From 3426e1858453f4970785aef7bcd462c94fc9eb99 Mon Sep 17 00:00:00 2001 From: psychocrypt <psychocryptHPC@gmail.com> Date: Tue, 5 Feb 2019 22:36:37 +0100 Subject: [PATCH] refactor POW definition A POW is now defined by a function `f` and three degrees of freedom `f(iteration, scratchpad, mask)`. `f` is the base algorithm like `cryptonight, cryptonight_gpu` An easy to pars snytax to write the full POW definition down is: `cryptonight_gpu:0x0000c000:0x00200000:0x001fffc0` This change make it very easy to integrate the new trend of variate the number of iteations or the scratchpad size without modifying the full code. --- xmrstak/backend/amd/amd_gpu/gpu.cpp | 18 +- xmrstak/backend/amd/amd_gpu/gpu.hpp | 8 +- xmrstak/backend/amd/autoAdjust.hpp | 15 +- xmrstak/backend/amd/minethd.cpp | 2 +- xmrstak/backend/amd/minethd.hpp | 2 +- xmrstak/backend/cpu/autoAdjust.hpp | 2 +- xmrstak/backend/cpu/autoAdjustHwloc.hpp | 2 +- xmrstak/backend/cpu/crypto/cn_gpu.hpp | 7 +- xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp | 43 +- xmrstak/backend/cpu/crypto/cn_gpu_ssse3.cpp | 45 +- .../backend/cpu/crypto/cryptonight_aesni.h | 113 +++-- .../backend/cpu/crypto/cryptonight_common.cpp | 4 +- xmrstak/backend/cpu/minethd.cpp | 210 ++++----- xmrstak/backend/cpu/minethd.hpp | 6 +- xmrstak/backend/cryptonight.hpp | 421 ++++++------------ xmrstak/backend/nvidia/minethd.cpp | 2 +- xmrstak/backend/nvidia/minethd.hpp | 2 +- .../backend/nvidia/nvcc_code/cryptonight.hpp | 6 +- xmrstak/backend/nvidia/nvcc_code/cuda_core.cu | 137 +++--- .../nvidia/nvcc_code/cuda_cryptonight_gpu.hpp | 17 +- .../backend/nvidia/nvcc_code/cuda_extra.cu | 14 +- xmrstak/cli/cli-miner.cpp | 2 +- xmrstak/jconf.cpp | 53 +-- xmrstak/misc/coinDescription.hpp | 10 +- xmrstak/misc/executor.cpp | 11 +- xmrstak/net/jpsock.cpp | 17 +- xmrstak/net/jpsock.hpp | 2 +- xmrstak/net/msgstruct.hpp | 4 +- 28 files changed, 526 insertions(+), 649 deletions(-) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index f80c37a..893c98a 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -330,7 +330,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ size_t scratchPadSize = 0; for(const auto algo : neededAlgorithms) { - scratchPadSize = std::max(scratchPadSize, cn_select_memory(algo)); + scratchPadSize = std::max(scratchPadSize, algo.Mem()); } size_t g_thd = ctx->rawIntensity; @@ -405,9 +405,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ for(const auto miner_algo : neededAlgorithms) { // scratchpad size for the selected mining algorithm - size_t hashMemSize = cn_select_memory(miner_algo); - int threadMemMask = cn_select_mask(miner_algo); - int hashIterations = cn_select_iter(miner_algo); + size_t hashMemSize = miner_algo.Mem(); + int threadMemMask = miner_algo.Mask(); + int hashIterations = miner_algo.Iter(); size_t mem_chunk_exp = 1u << ctx->memChunk; size_t strided_index = ctx->stridedIndex; @@ -438,7 +438,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U"; options += " -DCOMP_MODE=" + std::to_string(needCompMode); options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU"; - options += " -DALGO=" + std::to_string(miner_algo); + options += " -DALGO=" + std::to_string(miner_algo.Id()); options += " -DCN_UNROLL=" + std::to_string(ctx->unroll); /* AMD driver output is something like: `1445.5 (VM)` * and is mapped to `14` only. The value is only used for a compiler @@ -1001,10 +1001,10 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) return ERR_SUCCESS; } -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo) +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, const xmrstak_algo& miner_algo) { - const auto & Kernels = ctx->Kernels[miner_algo]; + const auto & Kernels = ctx->Kernels[miner_algo.Id()]; cl_int ret; @@ -1289,9 +1289,9 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment) return t0; } -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo) +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner_algo) { - const auto & Kernels = ctx->Kernels[miner_algo]; + const auto & Kernels = ctx->Kernels[miner_algo.Id()]; cl_int ret; cl_uint zero = 0; diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp index 5b95e98..16bc799 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.hpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -52,8 +52,8 @@ struct GpuContext cl_mem InputBuffer; cl_mem OutputBuffer; cl_mem ExtraBuffers[6]; - std::map<xmrstak_algo, cl_program> Program; - std::map<xmrstak_algo, std::array<cl_kernel,7>> Kernels; + std::map<xmrstak_algo_id, cl_program> Program; + std::map<xmrstak_algo_id, std::array<cl_kernel,7>> Kernels; size_t freeMem; size_t maxMemPerAlloc; int computeUnits; @@ -72,7 +72,7 @@ int getAMDPlatformIdx(); std::vector<GpuContext> getAMDDevices(int index); size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); -size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo); -size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, const xmrstak_algo& miner_algo); +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner_algo); uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment = true); uint64_t updateTimings(GpuContext* ctx, const uint64_t t); diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index 7ca072c..f21a339 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -88,7 +88,7 @@ private: size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } std::string conf; @@ -153,13 +153,12 @@ private: else if(useCryptonight_heavy) ctx.stridedIndex = 3; - // increase all intensity limits by two if scratchpad is only 1 MiB - if(hashMemSize <= CRYPTONIGHT_LITE_MEMORY) - maxThreads *= 2u; - - // increase all intensity limits by eight for turtle (*2u shadowed from lite) - if (hashMemSize <= CRYPTONIGHT_TURTLE_MEMORY) - maxThreads *= 4u; + if(hashMemSize < CN_MEMORY) + { + size_t factor = CN_MEMORY / hashMemSize; + // increase all intensity relative to the original scratchpad size + maxThreads *= factor; + } if(useCryptonight_gpu) { diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index b0f4e6e..1c9eb62 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -275,7 +275,7 @@ void minethd::work_main() *(uint32_t*)(bWorkBlob + 39) = results[i]; - hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx); + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId)); else diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp index 74ab5fb..402d63c 100644 --- a/xmrstak/backend/amd/minethd.hpp +++ b/xmrstak/backend/amd/minethd.hpp @@ -24,7 +24,7 @@ public: static bool init_gpus(); private: - typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**); + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::thd_cfg cfg); diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp index 91da7a6..b56afcc 100644 --- a/xmrstak/backend/cpu/autoAdjust.hpp +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -33,7 +33,7 @@ public: size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } const size_t hashMemSizeKB = hashMemSize / 1024u; diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp index 39e80a3..585159c 100644 --- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -32,7 +32,7 @@ public: for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } halfHashMemSize = hashMemSize / 2u; } diff --git a/xmrstak/backend/cpu/crypto/cn_gpu.hpp b/xmrstak/backend/cpu/crypto/cn_gpu.hpp index 4a7697b..5844d38 100644 --- a/xmrstak/backend/cpu/crypto/cn_gpu.hpp +++ b/xmrstak/backend/cpu/crypto/cn_gpu.hpp @@ -1,5 +1,6 @@ #pragma once +#include "xmrstak/backend/cryptonight.hpp" #include <stdint.h> #if defined(_WIN32) || defined(_WIN64) @@ -36,8 +37,6 @@ inline bool cngpu_check_avx2() return (cpu_info[1] & (1 << 5)) != 0; } -template<size_t ITER, uint32_t MASK> -void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad); +void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad, const xmrstak_algo& algo); -template<size_t ITER, uint32_t MASK> -void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad); +void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad, const xmrstak_algo& algo); diff --git a/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp b/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp index e46705f..8b4aefe 100644 --- a/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp +++ b/xmrstak/backend/cpu/crypto/cn_gpu_avx.cpp @@ -9,11 +9,11 @@ inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01) n01 = _mm256_cvtepi32_ps(v); } -inline __m256 fma_break(const __m256& x) -{ - // Break the dependency chain by setitng the exp to ?????01 - __m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x); - return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx); +inline __m256 fma_break(const __m256& x) +{ + // Break the dependency chain by setitng the exp to ?????01 + __m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x); + return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx); } // 14 @@ -60,7 +60,7 @@ inline void round_compute(const __m256& n0, const __m256& n1, const __m256& n2, // 112×4 = 448 template <bool add> -inline __m256i double_comupte(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, +inline __m256i double_comupte(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, float lcnt, float hcnt, const __m256& rnd_c, __m256& sum) { __m256 c = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_set1_ps(lcnt)), _mm_set1_ps(hcnt), 1); @@ -85,7 +85,7 @@ inline __m256i double_comupte(const __m256& n0, const __m256& n1, const __m256& } template <size_t rot> -inline void double_comupte_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, +inline void double_comupte_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, float lcnt, float hcnt, const __m256& rnd_c, __m256& sum, __m256i& out) { __m256i r = double_comupte<rot % 2 != 0>(n0, n1, n2, n3, lcnt, hcnt, rnd_c, sum); @@ -95,15 +95,18 @@ inline void double_comupte_wrap(const __m256& n0, const __m256& n1, const __m256 out = _mm256_xor_si256(out, r); } -template<uint32_t MASK> -inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m256i*>(lpad + (idx & MASK) + n*16); } -template<size_t ITER, uint32_t MASK> -void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) +inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n, const uint32_t mask) { return reinterpret_cast<__m256i*>(lpad + (idx & mask) + n*16); } + + +void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad, const xmrstak_algo& algo) { + const uint32_t ITER = algo.Iter(); + const uint32_t mask = algo.Mask(); + uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8; - __m256i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0); - __m256i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2); + __m256i* idx0 = scratchpad_ptr(lpad, s, 0, mask); + __m256i* idx2 = scratchpad_ptr(lpad, s, 2, mask); __m256 sum0 = _mm256_setzero_ps(); for(size_t i = 0; i < ITER; i++) @@ -116,13 +119,13 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) __m256 d01, d23; prep_dv_avx(idx0, v01, n01); prep_dv_avx(idx2, v23, n23); - + __m256i out, out2; __m256 n10, n22, n33; n10 = _mm256_permute2f128_ps(n01, n01, 0x01); n22 = _mm256_permute2f128_ps(n23, n23, 0x00); n33 = _mm256_permute2f128_ps(n23, n23, 0x11); - + out = _mm256_setzero_si256(); double_comupte_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out); double_comupte_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out); @@ -131,7 +134,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) _mm256_store_si256(idx0, _mm256_xor_si256(v01, out)); sum0 = _mm256_add_ps(suma, sumb); out2 = out; - + __m256 n11, n02, n30; n11 = _mm256_permute2f128_ps(n01, n01, 0x11); n02 = _mm256_permute2f128_ps(n01, n23, 0x20); @@ -156,7 +159,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) __m128 sum = _mm256_castps256_ps128(sum0); sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit - // vs range 0 - 64 + // vs range 0 - 64 __m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f))); v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2)); __m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3)); @@ -168,9 +171,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) sum = _mm_div_ps(sum, _mm_set1_ps(64.0f)); sum0 = _mm256_insertf128_ps(_mm256_castps128_ps256(sum), sum, 1); uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr<MASK>(lpad, n, 0); - idx2 = scratchpad_ptr<MASK>(lpad, n, 2); + idx0 = scratchpad_ptr(lpad, n, 0, mask); + idx2 = scratchpad_ptr(lpad, n, 2, mask); } } - -template void cn_gpu_inner_avx<CRYPTONIGHT_GPU_ITER, CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad); diff --git a/xmrstak/backend/cpu/crypto/cn_gpu_ssse3.cpp b/xmrstak/backend/cpu/crypto/cn_gpu_ssse3.cpp index bde3416..c8627d8 100644 --- a/xmrstak/backend/cpu/crypto/cn_gpu_ssse3.cpp +++ b/xmrstak/backend/cpu/crypto/cn_gpu_ssse3.cpp @@ -9,11 +9,11 @@ inline void prep_dv(__m128i* idx, __m128i& v, __m128& n) n = _mm_cvtepi32_ps(v); } -inline __m128 fma_break(__m128 x) -{ - // Break the dependency chain by setitng the exp to ?????01 - x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x); - return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x); +inline __m128 fma_break(__m128 x) +{ + // Break the dependency chain by setitng the exp to ?????01 + x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x); + return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x); } // 14 @@ -94,25 +94,26 @@ inline void single_comupte_wrap(__m128 n0, __m128 n1, __m128 n2, __m128 n3, flo out = _mm_xor_si128(out, r); } -template<uint32_t MASK> -inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m128i*>(lpad + (idx & MASK) + n*16); } +inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n, const uint32_t mask) { return reinterpret_cast<__m128i*>(lpad + (idx & mask) + n*16); } -template<size_t ITER, uint32_t MASK> -void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) +void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad, const xmrstak_algo& algo) { + const uint32_t ITER = algo.Iter(); + const uint32_t mask = algo.Mask(); + uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8; - __m128i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0); - __m128i* idx1 = scratchpad_ptr<MASK>(lpad, s, 1); - __m128i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2); - __m128i* idx3 = scratchpad_ptr<MASK>(lpad, s, 3); + __m128i* idx0 = scratchpad_ptr(lpad, s, 0, mask); + __m128i* idx1 = scratchpad_ptr(lpad, s, 1, mask); + __m128i* idx2 = scratchpad_ptr(lpad, s, 2, mask); + __m128i* idx3 = scratchpad_ptr(lpad, s, 3, mask); __m128 sum0 = _mm_setzero_ps(); - + for(size_t i = 0; i < ITER; i++) { __m128 n0, n1, n2, n3; __m128i v0, v1, v2, v3; __m128 suma, sumb, sum1, sum2, sum3; - + prep_dv(idx0, v0, n0); prep_dv(idx1, v1, n1); prep_dv(idx2, v2, n2); @@ -128,7 +129,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) sum0 = _mm_add_ps(suma, sumb); _mm_store_si128(idx0, _mm_xor_si128(v0, out)); out2 = out; - + out = _mm_setzero_si128(); single_comupte_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out); single_comupte_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out); @@ -160,7 +161,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) sum0 = _mm_add_ps(sum0, sum2); sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit - // vs range 0 - 64 + // vs range 0 - 64 n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f)); v0 = _mm_cvttps_epi32(n0); v0 = _mm_xor_si128(v0, out2); @@ -172,11 +173,9 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) // vs is now between 0 and 1 sum0 = _mm_div_ps(sum0, _mm_set1_ps(64.0f)); uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr<MASK>(lpad, n, 0); - idx1 = scratchpad_ptr<MASK>(lpad, n, 1); - idx2 = scratchpad_ptr<MASK>(lpad, n, 2); - idx3 = scratchpad_ptr<MASK>(lpad, n, 3); + idx0 = scratchpad_ptr(lpad, n, 0, mask); + idx1 = scratchpad_ptr(lpad, n, 1, mask); + idx2 = scratchpad_ptr(lpad, n, 2, mask); + idx3 = scratchpad_ptr(lpad, n, 3, mask); } } - -template void cn_gpu_inner_ssse3<CRYPTONIGHT_GPU_ITER, CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad); diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index c75eff8..d0c574c 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -165,11 +165,11 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3 x7 = _mm_xor_si128(x7, tmp0); } -template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO> -void cn_explode_scratchpad(const __m128i* input, __m128i* output) +template<bool SOFT_AES, bool PREFETCH, xmrstak_algo_id ALGO> +void cn_explode_scratchpad(const __m128i* input, __m128i* output, const xmrstak_algo& algo) { constexpr bool HEAVY_MIX = ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast; - + // This is more than we have registers, compiler will assign 2 keys on the stack __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; @@ -219,6 +219,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) } } + const size_t MEM = algo.Mem(); for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { if(SOFT_AES) @@ -266,13 +267,14 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) } } -template<size_t MEM, bool PREFETCH, xmrstak_algo ALGO> -void cn_explode_scratchpad_gpu(const uint8_t* input, uint8_t* output) +template<bool PREFETCH, xmrstak_algo_id ALGO> +void cn_explode_scratchpad_gpu(const uint8_t* input, uint8_t* output, const xmrstak_algo& algo) { constexpr size_t hash_size = 200; // 25x8 bytes alignas(128) uint64_t hash[25]; + const size_t mem = algo.Mem(); - for (uint64_t i = 0; i < MEM / 512; i++) + for (uint64_t i = 0; i < mem / 512; i++) { memcpy(hash, input, hash_size); hash[0] ^= i; @@ -288,7 +290,7 @@ void cn_explode_scratchpad_gpu(const uint8_t* input, uint8_t* output) keccakf(hash, 24); memcpy(output, hash, 176); output+=176; - + if(PREFETCH) { _mm_prefetch((const char*)output - 512, _MM_HINT_T2); @@ -299,10 +301,10 @@ void cn_explode_scratchpad_gpu(const uint8_t* input, uint8_t* output) } } -template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO> -void cn_implode_scratchpad(const __m128i* input, __m128i* output) +template<bool SOFT_AES, bool PREFETCH, xmrstak_algo_id ALGO> +void cn_implode_scratchpad(const __m128i* input, __m128i* output, const xmrstak_algo& algo) { - constexpr bool HEAVY_MIX = ALGO == cryptonight_heavy || ALGO == cryptonight_haven || + constexpr bool HEAVY_MIX = ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast || ALGO == cryptonight_gpu; // This is more than we have registers, compiler will assign 2 keys on the stack @@ -320,6 +322,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) xout6 = _mm_load_si128(output + 10); xout7 = _mm_load_si128(output + 11); + const size_t MEM = algo.Mem(); for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { if(PREFETCH) @@ -504,7 +507,7 @@ inline __m128i aes_round_bittube2(const __m128i& val, const __m128i& key) return _mm_load_si128((__m128i*)k); } -template<xmrstak_algo ALGO> +template<xmrstak_algo_id ALGO> inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) { mem_out[0] = _mm_cvtsi128_si64(tmp); @@ -650,7 +653,7 @@ inline void set_float_rounding_mode() monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx[n]->hash_state) + 24); \ } \ /* Optim - 99% time boundary */ \ - cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->hash_state, (__m128i*)ctx[n]->long_state); \ + cn_explode_scratchpad<SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->hash_state, (__m128i*)ctx[n]->long_state, algo); \ \ __m128i ax0; \ uint64_t idx0; \ @@ -768,7 +771,7 @@ inline void set_float_rounding_mode() #define CN_FINALIZE(n) \ /* Optim - 90% time boundary */ \ - cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->long_state, (__m128i*)ctx[n]->hash_state); \ + cn_implode_scratchpad<SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->long_state, (__m128i*)ctx[n]->hash_state, algo); \ /* Optim - 99% time boundary */ \ keccakf((uint64_t*)ctx[n]->hash_state, 24); \ extra_hashes[ctx[n]->hash_state[0] & 3](ctx[n]->hash_state, 200, (char*)output + 32 * n) @@ -837,12 +840,12 @@ struct Cryptonight_hash<1> { static constexpr size_t N = 1; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); CN_INIT_SINGLE; REPEAT_1(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); @@ -866,12 +869,12 @@ struct Cryptonight_hash<2> { static constexpr size_t N = 2; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); CN_INIT_SINGLE; REPEAT_2(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); @@ -895,12 +898,12 @@ struct Cryptonight_hash<3> { static constexpr size_t N = 3; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); CN_INIT_SINGLE; REPEAT_3(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); @@ -924,12 +927,12 @@ struct Cryptonight_hash<4> { static constexpr size_t N = 4; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); CN_INIT_SINGLE; REPEAT_4(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); @@ -953,12 +956,12 @@ struct Cryptonight_hash<5> { static constexpr size_t N = 5; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); CN_INIT_SINGLE; REPEAT_5(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); @@ -990,20 +993,19 @@ struct Cryptonight_hash_asm<1, asm_version> { static constexpr size_t N = 1; - template<xmrstak_algo ALGO> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MEM = cn_select_memory<ALGO>(); keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200); - cn_explode_scratchpad<MEM, false, false, ALGO>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state); + cn_explode_scratchpad<false, false, ALGO>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state, algo); if(asm_version == 0) cryptonight_v8_mainloop_ivybridge_asm(ctx[0]); else if(asm_version == 1) cryptonight_v8_mainloop_ryzen_asm(ctx[0]); - cn_implode_scratchpad<MEM, false, false, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state); + cn_implode_scratchpad<false, false, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state, algo); keccakf((uint64_t*)ctx[0]->hash_state, 24); extra_hashes[ctx[0]->hash_state[0] & 3](ctx[0]->hash_state, 200, (char*)output); } @@ -1015,16 +1017,16 @@ struct Cryptonight_hash_asm<2, 0> { static constexpr size_t N = 2; - template<xmrstak_algo ALGO> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MEM = cn_select_memory<ALGO>(); + const size_t MEM = algo.Mem(); for(size_t i = 0; i < N; ++i) { keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); /* Optim - 99% time boundary */ - cn_explode_scratchpad<MEM, false, false, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + cn_explode_scratchpad<false, false, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); } cryptonight_v8_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); @@ -1032,7 +1034,7 @@ struct Cryptonight_hash_asm<2, 0> for(size_t i = 0; i < N; ++i) { /* Optim - 90% time boundary */ - cn_implode_scratchpad<MEM, false, false, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + cn_implode_scratchpad<false, false, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state, algo); /* Optim - 99% time boundary */ keccakf((uint64_t*)ctx[i]->hash_state, 24); extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); @@ -1044,22 +1046,19 @@ struct Cryptonight_hash_gpu { static constexpr size_t N = 1; - template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH> - static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) + template<xmrstak_algo_id ALGO, bool SOFT_AES, bool PREFETCH> + static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { - constexpr size_t MASK = cn_select_mask<ALGO>(); - constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); - constexpr size_t MEM = cn_select_memory<ALGO>(); keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200); - cn_explode_scratchpad_gpu<MEM, PREFETCH, ALGO>(ctx[0]->hash_state, ctx[0]->long_state); + cn_explode_scratchpad_gpu<PREFETCH, ALGO>(ctx[0]->hash_state, ctx[0]->long_state, algo); if(cngpu_check_avx2()) - cn_gpu_inner_avx<ITERATIONS, MASK>(ctx[0]->hash_state, ctx[0]->long_state); + cn_gpu_inner_avx(ctx[0]->hash_state, ctx[0]->long_state, algo); else - cn_gpu_inner_ssse3<ITERATIONS, MASK>(ctx[0]->hash_state, ctx[0]->long_state); + cn_gpu_inner_ssse3(ctx[0]->hash_state, ctx[0]->long_state, algo); - cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state); + cn_implode_scratchpad<SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state, algo); keccakf((uint64_t*)ctx[0]->hash_state, 24); memcpy(output, ctx[0]->hash_state, 32); } diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp index ee1ff23..a065abe 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -208,7 +208,7 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096); @@ -292,7 +292,7 @@ void cryptonight_free_ctx(cryptonight_ctx* ctx) size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } if(ctx->ctx_info[0] != 0) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index e1af701..8246610 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -239,190 +239,187 @@ bool minethd::self_test() cn_hash_fun hashf; cn_hash_fun hashf_multi; - if(xmrstak_algo::invalid_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(0).GetMiningAlgoRoot() || - xmrstak_algo::invalid_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()) - { - printer::inst()->print_msg(L0, "Root algorithm is not allowed to be invalid"); - return false; - } - auto neededAlgorithms = ::jconf::inst()->GetCurrentCoinSelection().GetAllAlgorithms(); for(const auto algo : neededAlgorithms) { - if(algo == cryptonight) + if(algo == POW(cryptonight)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf("This is a test", 14, out, ctx); + std::cout<<algo.Name()<< " test cn" <<std::endl; + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight); - hashf("This is a test", 14, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; - hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx); + hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); bResult = bResult && memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight); - hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx); + hashf_multi = func_multi_selector<2>(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx, algo); bResult = bResult && memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; - hashf_multi = func_multi_selector<3>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx); + hashf_multi = func_multi_selector<3>(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0; - hashf_multi = func_multi_selector<4>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx); + hashf_multi = func_multi_selector<4>(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0; - hashf_multi = func_multi_selector<5>(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); + hashf_multi = func_multi_selector<5>(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx, algo); bResult = bResult && memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; } - else if(algo == cryptonight_lite) + else if(algo == POW(cryptonight_lite)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_lite); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_lite); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; } - else if(algo == cryptonight_monero) + else if(algo == POW(cryptonight_monero)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x1\x57\xc5\xee\x18\x8b\xbe\xc8\x97\x52\x85\xa3\x6\x4e\xe9\x20\x65\x21\x76\x72\xfd\x69\xa1\xae\xbd\x7\x66\xc7\xb5\x6e\xe0\xbd", 32) == 0; } - else if(algo == cryptonight_monero_v8) + else if(algo == POW(cryptonight_monero_v8)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero_v8); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = memcmp(out, "\x35\x3f\xdc\x06\x8f\xd4\x7b\x03\xc0\x4b\x94\x31\xe0\x05\xe0\x0b\x68\xc2\x16\x8a\x3c\xc7\x33\x5c\x8b\x9b\x30\x81\x56\x59\x1a\x4f", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero_v8); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult &= memcmp(out, "\x35\x3f\xdc\x06\x8f\xd4\x7b\x03\xc0\x4b\x94\x31\xe0\x05\xe0\x0b\x68\xc2\x16\x8a\x3c\xc7\x33\x5c\x8b\x9b\x30\x81\x56\x59\x1a\x4f", 32) == 0; } - else if(algo == cryptonight_aeon) + else if(algo == POW(cryptonight_aeon)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_aeon); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_aeon); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xfc\xa1\x7d\x44\x37\x70\x9b\x4a\x3b\xd7\x1e\xf3\xed\x21\xb4\x17\xca\x93\xdc\x86\x79\xce\x81\xdf\xd3\xcb\xdd\xa\x22\xd7\x58\xba", 32) == 0; } - else if(algo == cryptonight_ipbc) + else if(algo == POW(cryptonight_ipbc)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_ipbc); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0xb0", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_ipbc); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbc\xe7\x48\xaf\xc5\x31\xff\xc9\x33\x7f\xcf\x51\x1b\xe3\x20\xa3\xaa\x8d\x4\x55\xf9\x14\x2a\x61\xe8\x38\xdf\xdc\x3b\x28\x3e\x0", 32) == 0; } - else if(algo == cryptonight_stellite) + else if(algo == POW(cryptonight_stellite)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_stellite); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_stellite); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xb9\x9d\x6c\xee\x50\x3c\x6f\xa6\x3f\x30\x69\x24\x4a\x0\x9f\xe4\xd4\x69\x3f\x68\x92\xa4\x5c\xc2\x51\xae\x46\x87\x7c\x6b\x98\xae", 32) == 0; } - else if(algo == cryptonight_masari) + else if(algo == POW(cryptonight_masari)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_masari); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_masari); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xbf\x5f\xd\xf3\x5a\x65\x7c\x89\xb0\x41\xcf\xf0\xd\x46\x6a\xb6\x30\xf9\x77\x7f\xd9\xc6\x3\xd7\x3b\xd8\xf1\xb5\x4b\x49\xed\x28", 32) == 0; } - else if(algo == cryptonight_heavy) + else if(algo == POW(cryptonight_heavy)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_heavy); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_heavy); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf9\x44\x97\xce\xb4\xf0\xd9\x84\xb\x9b\xfc\x45\x94\x74\x55\x25\xcf\x26\x83\x16\x4f\xc\xf8\x2d\xf5\xf\x25\xff\x45\x28\x2e\x85", 32) == 0; } - else if(algo == cryptonight_haven) + else if(algo == POW(cryptonight_haven)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_haven); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_haven); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xc7\xd4\x52\x9\x2b\x48\xa5\xaf\xae\x11\xaf\x40\x9a\x87\xe5\x88\xf0\x29\x35\xa3\x68\xd\xe3\x6b\xce\x43\xf6\xc8\xdf\xd3\xe3\x9", 32) == 0; } - else if(algo == cryptonight_bittube2) + else if(algo == POW(cryptonight_bittube2)) { unsigned char out[32 * MAX_N]; cn_hash_fun hashf; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_bittube2); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); - hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx); + hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx, algo); bResult = bResult && memcmp(out, "\x18\x2c\x30\x41\x93\x1a\x14\x73\xc6\xbf\x7e\x77\xfe\xb5\x17\x9b\xa8\xbe\xa9\x68\xba\x9e\xe1\xe8\x24\x1a\x12\x7a\xac\x81\xb4\x24", 32) == 0; - hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx); + hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx, algo); bResult = bResult && memcmp(out, "\x7f\xbe\xb9\x92\x76\x87\x5a\x3c\x43\xc2\xbe\x5a\x73\x36\x06\xb5\xdc\x79\xcc\x9c\xf3\x7c\x43\x3e\xb4\x18\x56\x17\xfb\x9b\xc9\x36", 32) == 0; - hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx); + hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx, algo); bResult = bResult && memcmp(out, "\x90\xdc\x65\x53\x8d\xb0\x00\xea\xa2\x52\xcd\xd4\x1c\x17\x7a\x64\xfe\xff\x95\x36\xe7\x71\x68\x35\xd4\xcf\x5c\x73\x56\xb1\x2f\xcd", 32) == 0; } - else if(algo == cryptonight_superfast) + else if(algo == POW(cryptonight_superfast)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_superfast); - hashf("\x03\x05\xa0\xdb\xd6\xbf\x05\xcf\x16\xe5\x03\xf3\xa6\x6f\x78\x00\x7c\xbf\x34\x14\x43\x32\xec\xbf\xc2\x2e\xd9\x5c\x87\x00\x38\x3b\x30\x9a\xce\x19\x23\xa0\x96\x4b\x00\x00\x00\x08\xba\x93\x9a\x62\x72\x4c\x0d\x75\x81\xfc\xe5\x76\x1e\x9d\x8a\x0e\x6a\x1c\x3f\x92\x4f\xdd\x84\x93\xd1\x11\x56\x49\xc0\x5e\xb6\x01", 76, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("\x03\x05\xa0\xdb\xd6\xbf\x05\xcf\x16\xe5\x03\xf3\xa6\x6f\x78\x00\x7c\xbf\x34\x14\x43\x32\xec\xbf\xc2\x2e\xd9\x5c\x87\x00\x38\x3b\x30\x9a\xce\x19\x23\xa0\x96\x4b\x00\x00\x00\x08\xba\x93\x9a\x62\x72\x4c\x0d\x75\x81\xfc\xe5\x76\x1e\x9d\x8a\x0e\x6a\x1c\x3f\x92\x4f\xdd\x84\x93\xd1\x11\x56\x49\xc0\x5e\xb6\x01", 76, out, ctx, algo); bResult = bResult && memcmp(out, "\x40\x86\x5a\xa8\x87\x41\xec\x1d\xcc\xbd\x2b\xc6\xff\x36\xb9\x4d\x54\x71\x58\xdb\x94\x69\x8e\x3c\xa0\x3d\xe4\x81\x9a\x65\x9f\xef", 32) == 0; } - else if(algo == cryptonight_gpu) + else if(algo == POW(cryptonight_gpu)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_gpu); - hashf("", 0, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\x55\x5e\x0a\xee\x78\x79\x31\x6d\x7d\xef\xf7\x72\x97\x3c\xb9\x11\x8e\x38\x95\x70\x9d\xb2\x54\x7a\xc0\x72\xd5\xb9\x13\x10\x01\xd8", 32) == 0; - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_gpu); - hashf("", 0, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\x55\x5e\x0a\xee\x78\x79\x31\x6d\x7d\xef\xf7\x72\x97\x3c\xb9\x11\x8e\x38\x95\x70\x9d\xb2\x54\x7a\xc0\x72\xd5\xb9\x13\x10\x01\xd8", 32) == 0; } - else if (algo == cryptonight_turtle) + else if (algo == POW(cryptonight_turtle)) { - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_turtle); - hashf("This is a test This is a test This is a test", 44, out, ctx); + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x30\x5f\x66\xfe\xbb\xf3\x60\x0e\xda\xbb\x60\xf7\xf1\xc9\xb9\x0a\x3a\xe8\x5a\x31\xd4\x76\xca\x38\x1d\x56\x18\xa6\xc6\x27\x60\xd7", 32) == 0; - - hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_turtle); - hashf("This is a test This is a test This is a test", 44, out, ctx); + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\x30\x5f\x66\xfe\xbb\xf3\x60\x0e\xda\xbb\x60\xf7\xf1\xc9\xb9\x0a\x3a\xe8\x5a\x31\xd4\x76\xca\x38\x1d\x56\x18\xa6\xc6\x27\x60\xd7", 32) == 0; } + else + printer::inst()->print_msg(L0, + "Cryptonight hash self-test NOT defined for POW %s", algo.Name().c_str()); if(!bResult) printer::inst()->print_msg(L0, @@ -504,7 +501,7 @@ static std::string getAsmName(const uint32_t num_hashes) } template<size_t N> -minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo, const std::string& asm_version_str) +minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str) { static_assert(N >= 1, "number of threads must be >= 1" ); @@ -513,7 +510,7 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc // function as a two digit binary uint8_t algv; - switch(algo) + switch(algo.Id()) { case cryptonight: algv = 2; @@ -622,7 +619,7 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc Cryptonight_hash<N>::template hash<cryptonight_superfast, true, false>, Cryptonight_hash<N>::template hash<cryptonight_superfast, false, true>, Cryptonight_hash<N>::template hash<cryptonight_superfast, true, true>, - + Cryptonight_hash_gpu::template hash<cryptonight_gpu, false, false>, Cryptonight_hash_gpu::template hash<cryptonight_gpu, true, false>, Cryptonight_hash_gpu::template hash<cryptonight_gpu, false, true>, @@ -642,7 +639,7 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc // check for asm optimized version for cryptonight_v8 - if(N <= 2 && algo == cryptonight_monero_v8 && bHaveAes) + if(N <= 2 && algo == cryptonight_monero_v8 && bHaveAes && algo.Mem() == CN_MEMORY && algo.Iter() == CN_ITER) { std::string selected_asm = asm_version_str; if(selected_asm == "auto") @@ -671,39 +668,10 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc } } - if (N <= 2 && (algo == cryptonight_turtle) && bHaveAes) - { - std::string selected_asm = asm_version_str; - if (selected_asm == "auto") - selected_asm = cpu::getAsmName(N); - - if (selected_asm != "off") - { - if (selected_asm == "intel_avx" && asm_version_str != "auto") - { - // Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx) - if (N == 1) - selected_function = Cryptonight_hash_asm<1u, 0u>::template hash<cryptonight_turtle>; - else if (N == 2) - selected_function = Cryptonight_hash_asm<2u, 0u>::template hash<cryptonight_turtle>; - } - // supports only 1 thread per hash - if (N == 1 && selected_asm == "amd_avx") - { - // AMD Ryzen (1xxx and 2xxx series) - selected_function = Cryptonight_hash_asm<1u, 1u>::template hash<cryptonight_turtle>; - } - if (asm_version_str == "auto" && (selected_asm != "intel_avx" || selected_asm != "amd_avx")) - printer::inst()->print_msg(L3, "Switch to assembler version for '%s' cpu's", selected_asm.c_str()); - else if (selected_asm != "intel_avx" && selected_asm != "amd_avx") // unknown asm type - printer::inst()->print_msg(L1, "Assembler '%s' unknown, fallback to non asm version of cryptonight_v8", selected_asm.c_str()); - } - } - return selected_function; } -minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo) +minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo) { return func_multi_selector<1>(bHaveAes, bNoPrefetch, algo); } @@ -853,7 +821,7 @@ void minethd::multiway_work_main() for (size_t i = 0; i < N; i++) *piNonce[i] = iNonce++; - hash_fun_multi(bWorkBlob, oWork.iWorkSize, bHashOut, ctx); + hash_fun_multi(bWorkBlob, oWork.iWorkSize, bHashOut, ctx, miner_algo); for (size_t i = 0; i < N; i++) { diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index eb77749..4131526 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -22,9 +22,9 @@ public: static std::vector<iBackend*> thread_starter(uint32_t threadOffset, miner_work& pWork); static bool self_test(); - typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**); + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); - static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo); + static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo); static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id); static cryptonight_ctx* minethd_alloc_ctx(); @@ -32,7 +32,7 @@ public: private: template<size_t N> - static cn_hash_fun func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo, const std::string& asm_version_str = "off"); + static cn_hash_fun func_multi_selector(bool bHaveAes, bool bNoPrefetch, const xmrstak_algo& algo, const std::string& asm_version_str = "off"); minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity, const std::string& asm_version); diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 4ac586d..9779608 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -3,8 +3,11 @@ #include <inttypes.h> #include <type_traits> #include <string> +#include <array> -enum xmrstak_algo +constexpr size_t start_derived_algo_id = 1000; + +enum xmrstak_algo_id { invalid_algo = 0, cryptonight = 1, @@ -22,307 +25,177 @@ enum xmrstak_algo cryptonight_gpu = 13, cryptonight_turtle = 14 // please add the algorithm name to get_algo_name() + + // e.g for derived algorithms cryptonight_derived = start_derived_algo_id }; /** get name of the algorithm * * @param algo mining algorithm */ -inline std::string get_algo_name(xmrstak_algo algo) +inline std::string get_algo_name(xmrstak_algo_id algo_id) { - std::string algo_name; - switch(algo) - { - case invalid_algo: - algo_name = "invalid_algo"; - break; - case cryptonight: - algo_name = "cryptonight"; - break; - case cryptonight_lite: - algo_name = "cryptonight_lite"; - break; - case cryptonight_monero: - algo_name = "cryptonight_v7"; - break; - case cryptonight_monero_v8: - algo_name = "cryptonight_v8"; - break; - case cryptonight_aeon: - algo_name = "cryptonight_lite_v7"; - break; - case cryptonight_stellite: - algo_name = "cryptonight_v7_stellite"; - break; - case cryptonight_ipbc: - algo_name = "cryptonight_lite_v7_xor"; - break; - case cryptonight_heavy: - algo_name = "cryptonight_heavy"; - break; - case cryptonight_haven: - algo_name = "cryptonight_haven"; - break; - case cryptonight_masari: - algo_name = "cryptonight_masari"; - break; - case cryptonight_superfast: - algo_name = "cryptonight_superfast"; - break; - case cryptonight_gpu: - algo_name = "cryptonight_gpu"; - break; - case cryptonight_turtle: - algo_name = "cryptonight_turtle"; - break; - default: - algo_name = "unknown"; - break; - } - - return algo_name; + static std::array<std::string, 15> base_algo_names = + {{ + "invalid_algo", + "cryptonight", + "cryptonight_lite", + "cryptonight_v7", + "cryptonight_heavy", + "cryptonight_lite_v7", + "cryptonight_lite_v7_xor", + "cryptonight_v7_stellite", + "cryptonight_masari", + "cryptonight_haven", + "cryptonight_bittube2", + "cryptonight_v8", + "cryptonight_superfast", + "cryptonight_gpu", + "cryptonight_turtle" + }}; + + static std::array<std::string, 0> derived_algo_names = + {{ + // cryptonight_derived + }}; + + + if(algo_id < start_derived_algo_id) + return base_algo_names[algo_id]; + else + return derived_algo_names[algo_id - start_derived_algo_id]; } -// define aeon settings -constexpr size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024; -constexpr uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0; -constexpr uint32_t CRYPTONIGHT_LITE_ITER = 0x40000; - -constexpr size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024; -constexpr uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; -constexpr uint32_t CRYPTONIGHT_ITER = 0x80000; - -constexpr size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024; -constexpr uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0; -constexpr uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000; - -constexpr uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0; -constexpr uint32_t CRYPTONIGHT_GPU_ITER = 0xC000; - -constexpr uint32_t CRYPTONIGHT_MASARI_ITER = 0x40000; - -constexpr uint32_t CRYPTONIGHT_SUPERFAST_ITER = 0x20000; - -constexpr size_t CRYPTONIGHT_TURTLE_MEMORY = 256 * 1024; -constexpr uint32_t CRYPTONIGHT_TURTLE_MASK = 0x1FFF0; -constexpr uint32_t CRYPTONIGHT_TURTLE_ITER = 0x10000; - -template<xmrstak_algo ALGO> -inline constexpr size_t cn_select_memory() { return 0; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_lite>() { return CRYPTONIGHT_LITE_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_monero>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_monero_v8>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_aeon>() { return CRYPTONIGHT_LITE_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_stellite>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_masari>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_superfast>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_gpu>() { return CRYPTONIGHT_MEMORY; } - -template<> -inline constexpr size_t cn_select_memory<cryptonight_turtle>() { return CRYPTONIGHT_TURTLE_MEMORY; } - -inline size_t cn_select_memory(xmrstak_algo algo) +struct xmrstak_algo { - switch(algo) + xmrstak_algo(xmrstak_algo_id name_id) : algo_name(name_id), base_algo(name_id) { - case cryptonight_stellite: - case cryptonight_monero: - case cryptonight_monero_v8: - case cryptonight_masari: - case cryptonight: - case cryptonight_superfast: - case cryptonight_gpu: - return CRYPTONIGHT_MEMORY; - case cryptonight_ipbc: - case cryptonight_aeon: - case cryptonight_lite: - return CRYPTONIGHT_LITE_MEMORY; - case cryptonight_bittube2: - case cryptonight_haven: - case cryptonight_heavy: - return CRYPTONIGHT_HEAVY_MEMORY; - case cryptonight_turtle: - return CRYPTONIGHT_TURTLE_MEMORY; - default: - return 0; } -} - -template<xmrstak_algo ALGO> -inline constexpr uint32_t cn_select_mask() { return 0; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_lite>() { return CRYPTONIGHT_LITE_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_monero>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_monero_v8>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_aeon>() { return CRYPTONIGHT_LITE_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_stellite>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_masari>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_superfast>() { return CRYPTONIGHT_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_gpu>() { return CRYPTONIGHT_GPU_MASK; } - -template<> -inline constexpr uint32_t cn_select_mask<cryptonight_turtle>() { return CRYPTONIGHT_TURTLE_MASK; } - -inline size_t cn_select_mask(xmrstak_algo algo) -{ - switch(algo) + xmrstak_algo(xmrstak_algo_id name_id, xmrstak_algo_id algorithm) : algo_name(name_id), base_algo(algorithm) + { + } + xmrstak_algo(xmrstak_algo_id name_id, xmrstak_algo_id algorithm, uint32_t iteration) : algo_name(name_id), base_algo(algorithm), iter(iteration) + { + } + xmrstak_algo(xmrstak_algo_id name_id, xmrstak_algo_id algorithm, uint32_t iteration, size_t memory) : algo_name(name_id), base_algo(algorithm), iter(iteration), mem(memory) + { + } + xmrstak_algo(xmrstak_algo_id name_id, xmrstak_algo_id algorithm, uint32_t iteration, size_t memory, uint32_t mem_mask) : algo_name(name_id), base_algo(algorithm), iter(iteration), mem(memory), mask(mem_mask) { - case cryptonight_stellite: - case cryptonight_monero: - case cryptonight_monero_v8: - case cryptonight_masari: - case cryptonight: - case cryptonight_superfast: - return CRYPTONIGHT_MASK; - case cryptonight_ipbc: - case cryptonight_aeon: - case cryptonight_lite: - return CRYPTONIGHT_LITE_MASK; - case cryptonight_bittube2: - case cryptonight_haven: - case cryptonight_heavy: - return CRYPTONIGHT_HEAVY_MASK; - case cryptonight_gpu: - return CRYPTONIGHT_GPU_MASK; - case cryptonight_turtle: - return CRYPTONIGHT_TURTLE_MASK; - default: - return 0; } -} - -template<xmrstak_algo ALGO> -inline constexpr uint32_t cn_select_iter() { return 0; } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight>() { return CRYPTONIGHT_ITER; } + /** check if the algorithm is equal to another algorithm + * + * we do not check the member algo_name because this is only an alias name + */ + bool operator==(const xmrstak_algo& other) const + { + return other.Id() == Id() && other.Mem() == Mem() && other.Iter() == Iter() && other.Mask() == Mask(); + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_lite>() { return CRYPTONIGHT_LITE_ITER; } + bool operator==(const xmrstak_algo_id& id) const + { + return base_algo == id; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_monero>() { return CRYPTONIGHT_ITER; } + operator xmrstak_algo_id() const + { + return base_algo; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_monero_v8>() { return CRYPTONIGHT_ITER; } + xmrstak_algo_id Id() const + { + return base_algo; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_ITER; } + size_t Mem() const + { + if(base_algo == invalid_algo) + return 0; + else + return mem; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_aeon>() { return CRYPTONIGHT_LITE_ITER; } + uint32_t Iter() const + { + return iter; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_ITER; } + /** Name of the algorithm + * + * This name is only an alias for the native implemented base algorithm. + */ + std::string Name() const + { + return get_algo_name(algo_name); + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_stellite>() { return CRYPTONIGHT_ITER; } + /** Name of the parent algorithm + * + * This is the real algorithm which is implemented in all POW functions. + */ + std::string BaseName() const + { + return get_algo_name(base_algo); + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_masari>() { return CRYPTONIGHT_MASARI_ITER; } + uint32_t Mask() const + { + // default is a 16 byte aligne mask + if(mask == 0) + return ((mem - 1u) / 16) * 16; + else + return mask; + } -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_ITER; } + xmrstak_algo_id algo_name = invalid_algo; + xmrstak_algo_id base_algo = invalid_algo; + uint32_t iter = 0u; + size_t mem = 0u; + uint32_t mask = 0u; +}; -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_ITER; } +// default cryptonight +constexpr size_t CN_MEMORY = 2 * 1024 * 1024; +constexpr uint32_t CN_ITER = 0x80000; -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_superfast>() { return CRYPTONIGHT_SUPERFAST_ITER; } +// crptonight gpu +constexpr uint32_t CN_GPU_MASK = 0x1FFFC0; +constexpr uint32_t CN_GPU_ITER = 0xC000; -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_gpu>() { return CRYPTONIGHT_GPU_ITER; } +// cryptonight turtle (the mask is not using the full 256kib scratchpad) +constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0; -template<> -inline constexpr uint32_t cn_select_iter<cryptonight_turtle>() { return CRYPTONIGHT_TURTLE_ITER; } -inline size_t cn_select_iter(xmrstak_algo algo) +inline xmrstak_algo POW(xmrstak_algo_id algo_id) { - switch(algo) - { - case cryptonight_stellite: - case cryptonight_monero: - case cryptonight_monero_v8: - case cryptonight: - return CRYPTONIGHT_ITER; - case cryptonight_ipbc: - case cryptonight_aeon: - case cryptonight_lite: - return CRYPTONIGHT_LITE_ITER; - case cryptonight_bittube2: - case cryptonight_haven: - case cryptonight_heavy: - return CRYPTONIGHT_HEAVY_ITER; - case cryptonight_masari: - return CRYPTONIGHT_MASARI_ITER; - case cryptonight_superfast: - return CRYPTONIGHT_SUPERFAST_ITER; - case cryptonight_gpu: - return CRYPTONIGHT_GPU_ITER; - case cryptonight_turtle: - return CRYPTONIGHT_TURTLE_ITER; - default: - return 0; - } + static std::array<xmrstak_algo, 15> pow = {{ + {invalid_algo, invalid_algo}, + {cryptonight, cryptonight, CN_ITER, CN_MEMORY}, + {cryptonight_lite, cryptonight_lite, CN_ITER/2, CN_MEMORY/2}, + {cryptonight_monero, cryptonight_monero, CN_ITER, CN_MEMORY}, + {cryptonight_heavy, cryptonight_heavy, CN_ITER/2, CN_MEMORY*2}, + {cryptonight_aeon, cryptonight_aeon, CN_ITER/2, CN_MEMORY/2}, + {cryptonight_ipbc, cryptonight_ipbc, CN_ITER/2, CN_MEMORY/2}, // equal to cryptonight_aeon with a small tweak in the miner code + {cryptonight_stellite, cryptonight_stellite, CN_ITER, CN_MEMORY}, //equal to cryptonight_monero but with one tiny change + {cryptonight_masari, cryptonight_masari, CN_ITER/2, CN_MEMORY}, //equal to cryptonight_monero but with less iterations, used by masari + {cryptonight_haven, cryptonight_haven, CN_ITER/2, CN_MEMORY*2}, // equal to cryptonight_heavy with a small tweak + {cryptonight_bittube2, cryptonight_bittube2, CN_ITER/2, CN_MEMORY*2}, // derived from cryptonight_heavy with own aes-round implementation and minor other tweaks + {cryptonight_monero_v8, cryptonight_monero_v8, CN_ITER, CN_MEMORY}, + {cryptonight_superfast, cryptonight_superfast, CN_ITER/4, CN_MEMORY}, + {cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK}, + {cryptonight_turtle, cryptonight_turtle, CN_ITER/8, CN_MEMORY/8, CN_TURTLE_MASK} + }}; + + static std::array<xmrstak_algo, 1> derived_pow = + {{ + // cryptonight_turtle is currently only a placeholder that MSVC is not crying + {cryptonight_turtle, cryptonight_turtle, CN_ITER/8, CN_MEMORY/8, CN_TURTLE_MASK} + // {cryptonight_derived} + }}; + + if(algo_id < start_derived_algo_id) + return pow[algo_id]; + else + return derived_pow[algo_id - start_derived_algo_id]; } diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index 6460628..07ed4d3 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -307,7 +307,7 @@ void minethd::work_main() *(uint32_t*)(bWorkBlob + 39) = foundNonce[i]; - hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx); + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo); if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo, miner_algo), oWork.iPoolId)); else diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp index 3893568..3863c93 100644 --- a/xmrstak/backend/nvidia/minethd.hpp +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -28,7 +28,7 @@ public: static bool self_test(); private: - typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**); + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg); void start_mining(); diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp index 8fda8d4..45ffef8 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -46,8 +46,8 @@ int cuda_get_devicecount( int* deviceCount); int cuda_get_deviceinfo(nvid_ctx *ctx); int cryptonight_extra_cpu_init(nvid_ctx *ctx); void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, uint32_t len); -void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo); -void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo); +void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, const xmrstak_algo& miner_algo); +void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce, const xmrstak_algo& miner_algo); } -void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce); +void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, uint32_t startNonce); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 2acf1a3..31f76d2 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -128,8 +128,9 @@ __device__ __forceinline__ uint32_t rotate16( const uint32_t n ) return (n >> 16u) | (n << 16u); } -template<size_t ITERATIONS, uint32_t MEMORY> -__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state2, uint32_t * __restrict__ ctx_key1 ) +__global__ void cryptonight_core_gpu_phase1( + const uint32_t ITERATIONS, const size_t MEMORY, + int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state2, uint32_t * __restrict__ ctx_key1 ) { __shared__ uint32_t sharedMemory[1024]; @@ -267,11 +268,13 @@ struct u64 : public uint2 * @tparam MEM_MODE if `0` than 64bit memory transfers per thread will be used to store/load data within shared memory * else if `1` 256bit operations will be used */ -template<size_t ITERATIONS, uint32_t MEMORY, uint32_t MASK, xmrstak_algo ALGO, uint32_t MEM_MODE> +template<xmrstak_algo_id ALGO, uint32_t MEM_MODE> #ifdef XMR_STAK_THREADS __launch_bounds__( XMR_STAK_THREADS * 2 ) #endif -__global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state, +__global__ void cryptonight_core_gpu_phase2_double( + const uint32_t ITERATIONS, const size_t MEMORY, const uint32_t MASK, + int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state, uint32_t startNonce, uint32_t * __restrict__ d_input ) { __shared__ uint32_t sharedMemory[512]; @@ -482,11 +485,13 @@ __global__ void cryptonight_core_gpu_phase2_double( int threads, int bfactor, in } } -template<size_t ITERATIONS, uint32_t MEMORY, uint32_t MASK, xmrstak_algo ALGO> +template<xmrstak_algo_id ALGO> #ifdef XMR_STAK_THREADS __launch_bounds__( XMR_STAK_THREADS * 4 ) #endif -__global__ void cryptonight_core_gpu_phase2_quad( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state, +__global__ void cryptonight_core_gpu_phase2_quad( + const uint32_t ITERATIONS, const size_t MEMORY, const uint32_t MASK, + int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state, uint32_t startNonce, uint32_t * __restrict__ d_input ) { __shared__ uint32_t sharedMemory[1024]; @@ -685,8 +690,10 @@ __global__ void cryptonight_core_gpu_phase2_quad( int threads, int bfactor, int } } -template<size_t ITERATIONS, uint32_t MEMORY, xmrstak_algo ALGO> -__global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 ) +template<xmrstak_algo_id ALGO> +__global__ void cryptonight_core_gpu_phase3( + const uint32_t ITERATIONS, const size_t MEMORY, + int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 ) { __shared__ uint32_t sharedMemory[1024]; @@ -737,9 +744,13 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 ); } -template<size_t ITERATIONS, uint32_t MASK, uint32_t MEMORY, xmrstak_algo ALGO, uint32_t MEM_MODE> -void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) +template<xmrstak_algo_id ALGO, uint32_t MEM_MODE> +void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo& algo) { + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem()/4; + dim3 grid( ctx->device_blocks ); dim3 block( ctx->device_threads ); dim3 block2( ctx->device_threads << 1 ); @@ -761,7 +772,10 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) for ( int i = 0; i < partcountOneThree; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,MEMORY><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<<< grid, block8 >>>( + ITERATIONS, + MEM, + ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast ? ctx->d_ctx_state2 : ctx->d_ctx_state), @@ -779,13 +793,16 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_MSG_KERNEL( ctx->device_id, "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**", - cryptonight_core_gpu_phase2_double<ITERATIONS,MEMORY,MASK,ALGO, MEM_MODE><<< + cryptonight_core_gpu_phase2_double<ALGO, MEM_MODE><<< grid, block2, sizeof(uint64_t) * block2.x * 8 + // shuffle memory for fermi gpus block2.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) >>>( + ITERATIONS, + MEM, + MASK, ctx->device_blocks*ctx->device_threads, ctx->device_bfactor, i, @@ -803,11 +820,14 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_MSG_KERNEL( ctx->device_id, "\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**", - cryptonight_core_gpu_phase2_quad<ITERATIONS,MEMORY,MASK,ALGO><<< + cryptonight_core_gpu_phase2_quad<ALGO><<< grid, block4, block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) >>>( + ITERATIONS, + MEM, + MASK, ctx->device_blocks*ctx->device_threads, ctx->device_bfactor, i, @@ -834,20 +854,27 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) for ( int i = 0; i < roundsPhase3; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<< grid, block8, block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) - >>>( ctx->device_blocks*ctx->device_threads, + >>>( + ITERATIONS, + MEM, + ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key2 )); } } -template<size_t ITERATIONS, uint32_t MASK, uint32_t MEMORY, xmrstak_algo ALGO, uint32_t MEM_MODE> -void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce) +template<xmrstak_algo_id ALGO, uint32_t MEM_MODE> +void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo& algo) { + const uint32_t MASK = algo.Mask(); + const uint32_t ITERATIONS = algo.Iter(); + const size_t MEM = algo.Mem(); + dim3 grid( ctx->device_blocks ); dim3 block( ctx->device_threads ); dim3 block2( ctx->device_threads << 1 ); @@ -858,7 +885,7 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_KERNEL( ctx->device_id, - xmrstak::nvidia::cn_explode_gpu<MEMORY><<<intensity,32>>>((int*)ctx->d_ctx_state, (int*)ctx->d_long_state) + xmrstak::nvidia::cn_explode_gpu<<<intensity,32>>>(MEM, (int*)ctx->d_ctx_state, (int*)ctx->d_long_state) ); int partcount = 1 << ctx->device_bfactor; @@ -867,9 +894,12 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_KERNEL( ctx->device_id, // 36 x 16byte x numThreads - xmrstak::nvidia::cryptonight_core_gpu_phase2_gpu<ITERATIONS, MEMORY> + xmrstak::nvidia::cryptonight_core_gpu_phase2_gpu <<<ctx->device_blocks, ctx->device_threads * 16, 36 * 16 * ctx->device_threads>>> ( + ITERATIONS, + MEM, + MASK, (int*)ctx->d_ctx_state, (int*)ctx->d_long_state, ctx->device_bfactor, @@ -901,71 +931,74 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce) for ( int i = 0; i < roundsPhase3; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY/4, ALGO><<< + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<< grid, block8, block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) - >>>( ctx->device_blocks*ctx->device_threads, + >>>( + ITERATIONS, + MEM/4, + ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key2 )); } } -void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce) +void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, uint32_t startNonce) { - typedef void (*cuda_hash_fn)(nvid_ctx* ctx, uint32_t nonce); + typedef void (*cuda_hash_fn)(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo& algo); if(miner_algo == invalid_algo) return; static const cuda_hash_fn func_table[] = { - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight, 1>, + cryptonight_core_gpu_hash<cryptonight, 0>, + cryptonight_core_gpu_hash<cryptonight, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite, 1>, + cryptonight_core_gpu_hash<cryptonight_lite, 0>, + cryptonight_core_gpu_hash<cryptonight_lite, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero, 1>, + cryptonight_core_gpu_hash<cryptonight_monero, 0>, + cryptonight_core_gpu_hash<cryptonight_monero, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy, 1>, + cryptonight_core_gpu_hash<cryptonight_heavy, 0>, + cryptonight_core_gpu_hash<cryptonight_heavy, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_aeon, 1>, + cryptonight_core_gpu_hash<cryptonight_aeon, 0>, + cryptonight_core_gpu_hash<cryptonight_aeon, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_ipbc, 1>, + cryptonight_core_gpu_hash<cryptonight_ipbc, 0>, + cryptonight_core_gpu_hash<cryptonight_ipbc, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_stellite, 1>, + cryptonight_core_gpu_hash<cryptonight_stellite, 0>, + cryptonight_core_gpu_hash<cryptonight_stellite, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_MASARI_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_masari, 1>, + cryptonight_core_gpu_hash<cryptonight_masari, 0>, + cryptonight_core_gpu_hash<cryptonight_masari, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven, 1>, + cryptonight_core_gpu_hash<cryptonight_haven, 0>, + cryptonight_core_gpu_hash<cryptonight_haven, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2, 1>, + cryptonight_core_gpu_hash<cryptonight_bittube2, 0>, + cryptonight_core_gpu_hash<cryptonight_bittube2, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero_v8, 1>, + cryptonight_core_gpu_hash<cryptonight_monero_v8, 0>, + cryptonight_core_gpu_hash<cryptonight_monero_v8, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_SUPERFAST_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_superfast, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_SUPERFAST_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_superfast, 1>, + cryptonight_core_gpu_hash<cryptonight_superfast, 0>, + cryptonight_core_gpu_hash<cryptonight_superfast, 1>, - cryptonight_core_gpu_hash_gpu<CRYPTONIGHT_GPU_ITER, CRYPTONIGHT_GPU_MASK, CRYPTONIGHT_MEMORY, cryptonight_gpu, 0>, - cryptonight_core_gpu_hash_gpu<CRYPTONIGHT_GPU_ITER, CRYPTONIGHT_GPU_MASK, CRYPTONIGHT_MEMORY, cryptonight_gpu, 1>, + cryptonight_core_gpu_hash_gpu<cryptonight_gpu, 0>, + cryptonight_core_gpu_hash_gpu<cryptonight_gpu, 1>, - cryptonight_core_gpu_hash<CRYPTONIGHT_TURTLE_ITER, CRYPTONIGHT_TURTLE_MASK, CRYPTONIGHT_TURTLE_MEMORY/4, cryptonight_turtle, 0>, - cryptonight_core_gpu_hash<CRYPTONIGHT_TURTLE_ITER, CRYPTONIGHT_TURTLE_MASK, CRYPTONIGHT_TURTLE_MEMORY/4, cryptonight_turtle, 1> + cryptonight_core_gpu_hash<cryptonight_turtle, 0>, + cryptonight_core_gpu_hash<cryptonight_turtle, 1> }; std::bitset<1> digit; digit.set(0, ctx->memMode == 1); cuda_hash_fn selected_function = func_table[ ((miner_algo - 1u) << 1) | digit.to_ulong() ]; - selected_function(ctx, startNonce); + selected_function(ctx, startNonce, miner_algo); } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_cryptonight_gpu.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_cryptonight_gpu.hpp index a26fc21..7acccb4 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_cryptonight_gpu.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_cryptonight_gpu.hpp @@ -275,8 +275,7 @@ __forceinline__ __device__ __m128i _mm_alignr_epi8(__m128i a, const uint32_t rot ); } -template<uint32_t MASK> -__device__ __m128i* scratchpad_ptr(uint32_t idx, uint32_t n, int *lpad) { return (__m128i*)((uint8_t*)lpad + (idx & MASK) + n * 16); } +__device__ __m128i* scratchpad_ptr(uint32_t idx, uint32_t n, int *lpad, const uint32_t MASK) { return (__m128i*)((uint8_t*)lpad + (idx & MASK) + n * 16); } __forceinline__ __device__ __m128 fma_break(__m128 x) @@ -412,10 +411,10 @@ __forceinline__ __device__ void sync() #endif } -template<size_t ITERATIONS, uint32_t MEMORY> -__global__ void cryptonight_core_gpu_phase2_gpu(int32_t *spad, int *lpad_in, int bfactor, int partidx, uint32_t * roundVs, uint32_t * roundS) +__global__ void cryptonight_core_gpu_phase2_gpu( + const uint32_t ITERATIONS, const size_t MEMORY, const uint32_t MASK, + int32_t *spad, int *lpad_in, int bfactor, int partidx, uint32_t * roundVs, uint32_t * roundS) { - constexpr uint32_t MASK = ((MEMORY-1) >> 6) << 6; const int batchsize = (ITERATIONS * 2) >> ( 1 + bfactor ); @@ -457,7 +456,7 @@ __global__ void cryptonight_core_gpu_phase2_gpu(int32_t *spad, int *lpad_in, int for(size_t i = 0; i < batchsize; i++) { sync(); - ((int*)smem)[tid] = ((int*)scratchpad_ptr<MASK>(s, b, lpad))[bb]; + ((int*)smem)[tid] = ((int*)scratchpad_ptr(s, b, lpad, MASK))[bb]; sync(); __m128 rc = vs; @@ -477,7 +476,7 @@ __global__ void cryptonight_core_gpu_phase2_gpu(int32_t *spad, int *lpad_in, int for(uint32_t dd = block + 4; dd < (b + 1) * 16; dd += 4) outXor ^= ((int*)smemOut)[dd]; - ((int*)scratchpad_ptr<MASK>(s, b, lpad))[bb] = outXor ^ ((int*)smem)[tid]; + ((int*)scratchpad_ptr(s, b, lpad, MASK))[bb] = outXor ^ ((int*)smem)[tid]; ((int*)smemOut)[tid] = outXor; float va_tmp1 = ((float*)smemVa)[block] + ((float*)smemVa)[block + 4]; @@ -539,8 +538,8 @@ __forceinline__ __device__ void generate_512(uint64_t idx, const uint64_t* in, u ((ulonglong2*)out)[i] = ((ulonglong2*)hash)[i]; } -template<size_t MEMORY> -__global__ void cn_explode_gpu(int32_t *spad_in, int *lpad_in) + +__global__ void cn_explode_gpu(const size_t MEMORY, int32_t *spad_in, int *lpad_in) { __shared__ uint64_t state[25]; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index a37ecc8..edf87e3 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -93,7 +93,7 @@ __device__ __forceinline__ void mix_and_propagate( uint32_t* state ) (state + 4 * 7)[x] = (state + 4 * 7)[x] ^ tmp0[x]; } -template<xmrstak_algo ALGO> +template<xmrstak_algo_id ALGO> __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_state2, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 ) { int thread = ( blockDim.x * blockIdx.x + threadIdx.x ); @@ -162,7 +162,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric } } -template<xmrstak_algo ALGO> +template<xmrstak_algo_id ALGO> __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state,uint32_t * __restrict__ d_ctx_key2 ) { const int thread = blockDim.x * blockIdx.x + threadIdx.x; @@ -292,7 +292,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } size_t wsize = ctx->device_blocks * ctx->device_threads; @@ -335,7 +335,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) return 1; } -extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo) +extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, const xmrstak_algo& miner_algo) { int threadsperblock = 128; uint32_t wsize = ctx->device_blocks * ctx->device_threads; @@ -388,7 +388,7 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce } } -extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo) +extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce, const xmrstak_algo& miner_algo) { int threadsperblock = 128; uint32_t wsize = ctx->device_blocks * ctx->device_threads; @@ -697,7 +697,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) size_t hashMemSize = 0; for(const auto algo : neededAlgorithms) { - hashMemSize = std::max(hashMemSize, cn_select_memory(algo)); + hashMemSize = std::max(hashMemSize, algo.Mem()); } #ifdef WIN32 @@ -770,7 +770,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) size_t blockOptimal = 8 * ctx->device_mpcount; if(gpuArch >= 70) blockOptimal = 5 * ctx->device_mpcount; - + if(blockOptimal * threads * hashMemSize < limitedMemory) { ctx->device_threads = threads; diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 40fb9d9..7539706 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -801,7 +801,7 @@ int main(int argc, char *argv[]) printer::inst()->print_str("This currency is a way for us to implement the ideas that we were unable to in\n"); printer::inst()->print_str("Monero. See https://github.com/fireice-uk/cryptonote-speedup-demo for details.\n"); printer::inst()->print_str("-------------------------------------------------------------------\n"); - printer::inst()->print_msg(L0, "Mining coin: %s", jconf::inst()->GetMiningCoin().c_str()); + printer::inst()->print_msg(L0, "Mining coin: %s", ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo().Name().c_str()); if(params::inst().benchmark_block_version >= 0) { diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index a169105..b897a75 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -87,32 +87,33 @@ constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); xmrstak::coin_selection coins[] = { // name, userpool, devpool, default_pool_suggestion - { "aeon7", {cryptonight_aeon, cryptonight_aeon, 0u}, {cryptonight_aeon, cryptonight_aeon, 0u}, "mine.aeon-pool.com:5555" }, - { "bbscoin", {cryptonight_aeon, cryptonight_aeon, 0u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr }, - { "bittube", {cryptonight_heavy, cryptonight_bittube2, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u},"mining.bit.tube:13333"}, - { "cryptonight", {cryptonight_monero_v8, cryptonight, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "cryptonight_bittube2",{cryptonight_heavy, cryptonight_bittube2, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u},nullptr}, - { "cryptonight_masari", {cryptonight_monero_v8, cryptonight_masari, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u},nullptr }, - { "cryptonight_haven", {cryptonight_heavy, cryptonight_haven, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, - { "cryptonight_heavy", {cryptonight_heavy, cryptonight_heavy, 0u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, - { "cryptonight_lite", {cryptonight_aeon, cryptonight_lite, 255u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr }, - { "cryptonight_lite_v7", {cryptonight_aeon, cryptonight_aeon, 0u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr }, - { "cryptonight_lite_v7_xor", {cryptonight_aeon, cryptonight_ipbc, 255u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr }, - { "cryptonight_superfast", {cryptonight_heavy, cryptonight_superfast, 255u},{cryptonight_heavy, cryptonight_superfast, 0u}, nullptr }, - { "cryptonight_turtle", {cryptonight_turtle, cryptonight_turtle, 0u}, {cryptonight_turtle, cryptonight_turtle, 0u}, nullptr }, - { "cryptonight_v7", {cryptonight_monero_v8, cryptonight_monero, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "cryptonight_v8", {cryptonight_monero_v8, cryptonight_monero_v8, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "cryptonight_v7_stellite", {cryptonight_monero_v8, cryptonight_stellite, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "cryptonight_gpu", {cryptonight_gpu, cryptonight_gpu, 255u}, {cryptonight_gpu, cryptonight_gpu, 0u}, nullptr }, - { "freehaven", {cryptonight_heavy, cryptonight_superfast, 255u}, {cryptonight_heavy, cryptonight_superfast, 0u}, nullptr }, - { "graft", {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "haven", {cryptonight_heavy, cryptonight_haven, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, - { "lethean", {cryptonight_monero_v8, cryptonight_monero, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "monero", {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, "pool.usxmrpool.com:3333" }, - { "qrl", {cryptonight_monero_v8, cryptonight_monero, 255u}, {cryptonight_monero_v8, cryptonight_monero_v8, 0u}, nullptr }, - { "ryo", {cryptonight_gpu, cryptonight_heavy, 6u}, {cryptonight_gpu, cryptonight_heavy, 6u}, nullptr }, - { "turtlecoin", {cryptonight_turtle, cryptonight_aeon, 5u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr }, - { "plenteum", {cryptonight_turtle, cryptonight_aeon, 5u}, {cryptonight_aeon, cryptonight_aeon, 0u}, nullptr } + { "aeon7", {POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, "mine.aeon-pool.com:5555" }, + { "bbscoin", {POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, + { "bittube", {POW(cryptonight_bittube2)}, {POW(cryptonight_heavy)}, "mining.bit.tube:13333" }, + { "cryptonight", {POW(cryptonight)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_bittube2", {POW(cryptonight_bittube2)}, {POW(cryptonight_heavy)}, nullptr }, + { "cryptonight_masari", {POW(cryptonight_masari)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_haven", {POW(cryptonight_haven)}, {POW(cryptonight_heavy)}, nullptr }, + { "cryptonight_heavy", {POW(cryptonight_heavy)}, {POW(cryptonight_heavy)}, nullptr }, + { "cryptonight_lite", {POW(cryptonight_lite)}, {POW(cryptonight_aeon)}, nullptr }, + { "cryptonight_lite_v7", {POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, + { "cryptonight_lite_v7_xor", {POW(cryptonight_ipbc)}, {POW(cryptonight_aeon)}, nullptr }, + { "cryptonight_superfast", {POW(cryptonight_superfast)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_turtle", {POW(cryptonight_turtle)}, {POW(cryptonight_turtle)}, nullptr }, + { "cryptonight_v7", {POW(cryptonight_monero)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_v8", {POW(cryptonight_monero_v8)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_v7_stellite", {POW(cryptonight_stellite)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_gpu", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, nullptr }, + { "freehaven", {POW(cryptonight_superfast)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "graft", {POW(cryptonight_monero_v8)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "haven", {POW(cryptonight_haven)}, {POW(cryptonight_heavy)}, nullptr }, + { "lethean", {POW(cryptonight_monero)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "monero", {POW(cryptonight_monero_v8)}, {POW(cryptonight_monero_v8)}, "pool.usxmrpool.com:3333" }, + { "qrl", {POW(cryptonight_monero)}, {POW(cryptonight_monero_v8)}, nullptr }, + { "ryo", {POW(cryptonight_gpu), 6u, POW(cryptonight_heavy)}, {POW(cryptonight_gpu), 6u, POW(cryptonight_heavy)}, nullptr }, + { "turtlecoin", {POW(cryptonight_turtle), 6u,POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, + { "plenteum", {POW(cryptonight_turtle)}, {POW(cryptonight_turtle)}, nullptr } + }; constexpr size_t coin_algo_size = (sizeof(coins)/sizeof(coins[0])); diff --git a/xmrstak/misc/coinDescription.hpp b/xmrstak/misc/coinDescription.hpp index 26688ae..65dee14 100644 --- a/xmrstak/misc/coinDescription.hpp +++ b/xmrstak/misc/coinDescription.hpp @@ -11,13 +11,17 @@ namespace xmrstak { struct coinDescription { - xmrstak_algo algo = xmrstak_algo::invalid_algo; - xmrstak_algo algo_root = xmrstak_algo::invalid_algo; + xmrstak_algo algo = {xmrstak_algo_id::invalid_algo}; uint8_t fork_version = 0u; + xmrstak_algo algo_root = {xmrstak_algo_id::invalid_algo}; coinDescription() = default; - coinDescription(const xmrstak_algo in_algo, xmrstak_algo in_algo_root, const uint8_t in_fork_version) : + coinDescription( + const xmrstak_algo in_algo, + const uint8_t in_fork_version = 0, + xmrstak_algo in_algo_root = xmrstak_algo_id::invalid_algo + ) : algo(in_algo), algo_root(in_algo_root), fork_version(in_fork_version) {} diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index c475c41..0dd7db1 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -567,31 +567,24 @@ void executor::ex_main() pools.emplace_front(0, "donate.xmr-stak.net:5511", "", "", "", 0.0, true, false, "", false); break; case cryptonight_monero_v8: - case cryptonight_monero: - case cryptonight_turtle: if(dev_tls) pools.emplace_front(0, "donate.xmr-stak.net:8800", "", "", "", 0.0, true, true, "", false); else pools.emplace_front(0, "donate.xmr-stak.net:5500", "", "", "", 0.0, true, false, "", false); break; - case cryptonight_ipbc: case cryptonight_aeon: - case cryptonight_lite: if(dev_tls) pools.emplace_front(0, "donate.xmr-stak.net:7777", "", "", "", 0.0, true, true, "", true); else pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", "", 0.0, true, false, "", true); break; - - case cryptonight: + default: + case cryptonight_lite: if(dev_tls) pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false); else pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false); break; - - default: - break; } ex_event ev; diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp index ecd4d98..d5b0d7f 100644 --- a/xmrstak/net/jpsock.cpp +++ b/xmrstak/net/jpsock.cpp @@ -655,13 +655,17 @@ bool jpsock::cmd_login() return true; } -bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo) +bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, const xmrstak_algo& algo) { char cmd_buffer[1024]; char sNonce[9]; char sResult[65]; /*Extensions*/ char sAlgo[64] = {0}; + char sBaseAlgo[64] = {0}; + char sIterations[32] = {0}; + char sMemory[32] = {0}; + char sMemAlignBytes[32] = {0}; char sBackend[64] = {0}; char sHashcount[128] = {0}; @@ -673,7 +677,12 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes if(ext_algo) { - snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", get_algo_name(algo).c_str()); + snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", algo.Name().c_str()); + // the real algorithm with three degrees of freedom + snprintf(sBaseAlgo, sizeof(sBaseAlgo), ",\"base_algo\":\"%s\"", algo.BaseName().c_str()); + snprintf(sIterations, sizeof(sIterations), ",\"iterations\":\"0x%08x\"", algo.Iter()); + snprintf(sMemory, sizeof(sMemory), ",\"scratchpad\":\"0x%08x\"", (uint32_t)algo.Mem()); + snprintf(sMemAlignBytes, sizeof(sMemAlignBytes), ",\"mask\":\"0x%08x\"", algo.Mask()); } bin2hex((unsigned char*)&iNonce, 4, sNonce); @@ -682,8 +691,8 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes bin2hex(bResult, 32, sResult); sResult[64] = '\0'; - snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"submit\",\"params\":{\"id\":\"%s\",\"job_id\":\"%s\",\"nonce\":\"%s\",\"result\":\"%s\"%s%s%s},\"id\":1}\n", - sMinerId, sJobId, sNonce, sResult, sBackend, sHashcount, sAlgo); + snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"submit\",\"params\":{\"id\":\"%s\",\"job_id\":\"%s\",\"nonce\":\"%s\",\"result\":\"%s\"%s%s%s%s%s%s%s},\"id\":1}\n", + sMinerId, sJobId, sNonce, sResult, sBackend, sHashcount, sAlgo, sBaseAlgo, sIterations,sMemory, sMemAlignBytes); uint64_t messageId = 0; opq_json_val oResult(nullptr); diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp index 96fec6b..a1112df 100644 --- a/xmrstak/net/jpsock.hpp +++ b/xmrstak/net/jpsock.hpp @@ -35,7 +35,7 @@ public: void disconnect(bool quiet = false); bool cmd_login(); - bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo); + bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, const xmrstak_algo& algo); static bool hex2bin(const char* in, unsigned int len, unsigned char* out); static void bin2hex(const unsigned char* in, unsigned int len, char* out); diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp index 6a05eb9..cd23a94 100644 --- a/xmrstak/net/msgstruct.hpp +++ b/xmrstak/net/msgstruct.hpp @@ -33,10 +33,10 @@ struct job_result char sJobID[64]; uint32_t iNonce; uint32_t iThreadId; - xmrstak_algo algorithm = invalid_algo; + xmrstak_algo algorithm = {invalid_algo}; job_result() {} - job_result(const char* sJobID, uint32_t iNonce, const uint8_t* bResult, uint32_t iThreadId, xmrstak_algo algo) : + job_result(const char* sJobID, uint32_t iNonce, const uint8_t* bResult, uint32_t iThreadId, const xmrstak_algo& algo) : iNonce(iNonce), iThreadId(iThreadId), algorithm(algo) { memcpy(this->sJobID, sJobID, sizeof(job_result::sJobID)); -- GitLab