diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d21fa928c8c71f5a5ead048e450f2553d006309..09ff7aef519146966af9b4a56e19930587e82c52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,13 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) # help to find cuda on systems with a software module system list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_ROOT}") + +# help to find AMD OCL SDK Light (replaced APP SDK) +list(APPEND CMAKE_PREFIX_PATH "$ENV{OCL_ROOT}") + +# help to find AMD app SDK on systems with a software module system +list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}") + # allow user to extent CMAKE_PREFIX_PATH via environment variable list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") @@ -213,11 +220,6 @@ else() add_definitions("-DCONF_NO_CUDA") endif() -# help to find AMD app SDK on systems with a software module system -list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}") -# allow user to extent CMAKE_PREFIX_PATH via environment variable -list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") - ############################################################################### # Find OpenCL ############################################################################### @@ -231,6 +233,7 @@ if(OpenCL_ENABLE) OpenCL/cl.h NO_DEFAULT_PATH PATHS + ENV "OCL_ROOT" ENV "OpenCL_ROOT" ENV AMDAPPSDKROOT ENV ATISTREAMSDKROOT @@ -247,6 +250,7 @@ if(OpenCL_ENABLE) OpenCL.lib NO_DEFAULT_PATH PATHS + ENV "OCL_ROOT" ENV "OpenCL_ROOT" ENV AMDAPPSDKROOT ENV ATISTREAMSDKROOT diff --git a/README.md b/README.md index d19b1a77d692247a1bd1fbbda1969bfa615ac9a2..ff87dcead5dbcd0a00e85d8317172aefd8b6bccd 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this - [Aeon](http://www.aeon.cash) - [BBSCoin](https://www.bbscoin.xyz) - [BitTube](https://coin.bit.tube/) +- [Conceal](https://conceal.network) - [Graft](https://www.graft.network) - [Haven](https://havenprotocol.com) - [Lethean](https://lethean.io) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index eac4dadb089ce5313b2513c3d682a1bb8d6cceb1..2ca09c31c79ee2132ad6caf9b44f3df86f2a4fc9 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -29,7 +29,7 @@ R"===( #define cryptonight_monero_v8 11 #define cryptonight_superfast 12 #define cryptonight_gpu 13 -#define cryptonight_turtle 14 +#define cryptonight_conceal 14 /* For Mesa clover support */ #ifdef cl_clang_storage_class_specifiers @@ -365,6 +365,69 @@ XMRSTAK_INCLUDE_BLAKE256 //#include "opencl/groestl256.cl" XMRSTAK_INCLUDE_GROESTL256 +inline float4 _mm_add_ps(float4 a, float4 b) +{ + return a + b; +} + +inline float4 _mm_sub_ps(float4 a, float4 b) +{ + return a - b; +} + +inline float4 _mm_mul_ps(float4 a, float4 b) +{ + + //#pragma OPENCL SELECT_ROUNDING_MODE rte + return a * b; +} + +inline float4 _mm_div_ps(float4 a, float4 b) +{ + return a / b; +} + +inline float4 _mm_and_ps(float4 a, int b) +{ + return as_float4(as_int4(a) & (int4)(b)); +} + +inline float4 _mm_or_ps(float4 a, int b) +{ + return as_float4(as_int4(a) | (int4)(b)); +} + +inline float4 _mm_fmod_ps(float4 v, float dc) +{ + float4 d = (float4)(dc); + float4 c = _mm_div_ps(v, d); + c = trunc(c); + c = _mm_mul_ps(c, d); + return _mm_sub_ps(v, c); +} + +inline int4 _mm_xor_si128(int4 a, int4 b) +{ + return a ^ b; +} + +inline float4 _mm_xor_ps(float4 a, int b) +{ + return as_float4(as_int4(a) ^ (int4)(b)); +} + +inline int4 _mm_alignr_epi8(int4 a, const uint rot) +{ + const uint right = 8 * rot; + const uint left = (32 - 8 * rot); + return (int4)( + ((uint)a.x >> right) | ( a.y << left ), + ((uint)a.y >> right) | ( a.z << left ), + ((uint)a.z >> right) | ( a.w << left ), + ((uint)a.w >> right) | ( a.x << left ) + ); +} + #if (ALGO == cryptonight_gpu) //#include "opencl/cryptonight_gpu.cl" XMRSTAK_INCLUDE_CN_GPU @@ -592,6 +655,9 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ) { ulong a[2]; +#if(ALGO == cryptonight_conceal) + float4 conc_var = (float4)(0.0f); +#endif #if(ALGO == cryptonight_monero_v8) ulong b[4]; @@ -696,6 +762,21 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ((uint4 *)c)[0] = SCRATCHPAD_CHUNK(0); +#if(ALGO == cryptonight_conceal) + float4 r = convert_float4_rte(((int4 *)c)[0]); + float4 c_old = conc_var; + r = _mm_add_ps(r, conc_var); + r = _mm_mul_ps(r, _mm_mul_ps(r, r)); + r = _mm_and_ps(r, 0x807FFFFF); + r = _mm_or_ps(r, 0x40000000); + conc_var = _mm_add_ps(conc_var, r); + + c_old = _mm_and_ps(c_old, 0x807FFFFF); + c_old = _mm_or_ps(c_old, 0x40000000); + float4 nc = _mm_mul_ps(c_old, (float4)(536870880.0f)); + ((int4 *)c)[0] ^= convert_int4_rte(nc); +#endif + #if(ALGO == cryptonight_bittube2) ((uint4 *)c)[0] = AES_Round2_bittube2(AES0, AES1, ~((uint4 *)c)[0], ((uint4 *)a)[0]); #else @@ -1116,7 +1197,6 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint sph_u64 h4h = 0x754D2E7F8996A371UL, h4l = 0x62E27DF70849141DUL, h5h = 0x948F2476F7957627UL, h5l = 0x6C29804757B6D587UL, h6h = 0x6C0D8EAC2D275E5CUL, h6l = 0x0F7A0557C6508451UL, h7h = 0xEA12247067D3E47BUL, h7l = 0x69D71CD313ABE389UL; sph_u64 tmp; - #pragma unroll 1 for(uint i = 0; i < 3; ++i) { ulong input[8]; @@ -1169,7 +1249,6 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u ((uint8 *)h)[0] = vload8(0U, c_IV256); - #pragma unroll 1 for (uint i = 0; i < 3; ++i) { ((uint16 *)m)[0] = vload16(i, (__global uint *)states); @@ -1267,7 +1346,11 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global states += 25 * BranchBuf[idx]; ulong State[8] = { 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0x0001000000000000UL }; - volatile ulong H[8], M[8]; +#if defined(__clang__) && !defined(__NV_CL_C_VERSION) + // on ROCM we need volatile for AMD RX5xx cards to avoid invalid shares + volatile +#endif + ulong H[8], M[8]; for (uint i = 0; i < 3; ++i) { ((ulong8 *)M)[0] = vload8(i, states); diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight_gpu.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight_gpu.cl index 2fb794d86d8a9bd40cc2a69d747d37a540395765..e87819760b7b62ab4fa97e62669803ad298a0407 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight_gpu.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight_gpu.cl @@ -1,68 +1,5 @@ R"===( - -inline float4 _mm_add_ps(float4 a, float4 b) -{ - return a + b; -} - -inline float4 _mm_sub_ps(float4 a, float4 b) -{ - return a - b; -} - -inline float4 _mm_mul_ps(float4 a, float4 b) -{ - return a * b; -} - -inline float4 _mm_div_ps(float4 a, float4 b) -{ - return a / b; -} - -inline float4 _mm_and_ps(float4 a, int b) -{ - return as_float4(as_int4(a) & (int4)(b)); -} - -inline float4 _mm_or_ps(float4 a, int b) -{ - return as_float4(as_int4(a) | (int4)(b)); -} - -inline float4 _mm_fmod_ps(float4 v, float dc) -{ - float4 d = (float4)(dc); - float4 c = _mm_div_ps(v, d); - c = trunc(c); - c = _mm_mul_ps(c, d); - return _mm_sub_ps(v, c); -} - -inline int4 _mm_xor_si128(int4 a, int4 b) -{ - return a ^ b; -} - -inline float4 _mm_xor_ps(float4 a, int b) -{ - return as_float4(as_int4(a) ^ (int4)(b)); -} - -inline int4 _mm_alignr_epi8(int4 a, const uint rot) -{ - const uint right = 8 * rot; - const uint left = (32 - 8 * rot); - return (int4)( - ((uint)a.x >> right) | ( a.y << left ), - ((uint)a.y >> right) | ( a.z << left ), - ((uint)a.z >> right) | ( a.w << left ), - ((uint)a.w >> right) | ( a.x << left ) - ); -} - - inline global int4* scratchpad_ptr(uint idx, uint n, __global int *lpad) { return (__global int4*)((__global char*)lpad + (idx & MASK) + n * 16); } inline float4 fma_break(float4 x) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 7ba9e2fe89ea20829084c0ee7234896867ca06a4..dc378e88a56d537e4db998f55b24b061e3965277 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -585,6 +585,36 @@ inline void set_float_rounding_mode() #endif } +inline void set_float_rounding_mode_conceal() +{ +#ifdef _MSC_VER + _control87(RC_NEAR, MCW_RC); +#else + std::fesetround(FE_TONEAREST); +#endif +} + +inline __m128 _mm_set1_ps_epi32(uint32_t x) +{ + return _mm_castsi128_ps(_mm_set1_epi32(x)); +} + +inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) +{ + __m128 r = _mm_cvtepi32_ps(cx); + __m128 c_old = conc_var; + r = _mm_add_ps(r, conc_var); + r = _mm_mul_ps(r, _mm_mul_ps(r, r)); + r = _mm_and_ps(_mm_set1_ps_epi32(0x807FFFFF), r); + r = _mm_or_ps(_mm_set1_ps_epi32(0x40000000), r); + conc_var = _mm_add_ps(conc_var, r); + + c_old = _mm_and_ps(_mm_set1_ps_epi32(0x807FFFFF), c_old); + c_old = _mm_or_ps(_mm_set1_ps_epi32(0x40000000), c_old); + __m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f)); + cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc)); +} + #define CN_MONERO_V8_SHUFFLE_0(n, l0, idx0, ax0, bx0, bx1) \ /* Shuffle the other 3x16 byte chunks in the current 64-byte cache line */ \ if(ALGO == cryptonight_monero_v8) \ @@ -644,7 +674,7 @@ inline void set_float_rounding_mode() return; \ } -#define CN_INIT(n, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm) \ +#define CN_INIT(n, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm) \ keccak((const uint8_t *)input + len * n, len, ctx[n]->hash_state, 200); \ uint64_t monero_const; \ if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \ @@ -662,6 +692,12 @@ inline void set_float_rounding_mode() /* BEGIN cryptonight_monero_v8 variables */ \ __m128i bx1; \ __m128i division_result_xmm; \ + __m128 conc_var; \ + if(ALGO == cryptonight_conceal) \ + {\ + set_float_rounding_mode_conceal(); \ + conc_var = _mm_setzero_ps(); \ + }\ GetOptimalSqrtType_t<N> sqrt_result; \ /* END cryptonight_monero_v8 variables */ \ { \ @@ -679,10 +715,12 @@ inline void set_float_rounding_mode() } \ __m128i *ptr0 -#define CN_STEP1(n, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1) \ +#define CN_STEP1(n, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1) \ __m128i cx; \ ptr0 = (__m128i *)&l0[idx0 & MASK]; \ cx = _mm_load_si128(ptr0); \ + if (ALGO == cryptonight_conceal) \ + cryptonight_conceal_tweak(cx, conc_var); \ if (ALGO == cryptonight_bittube2) \ { \ cx = aes_round_bittube2(cx, ax0); \ @@ -848,12 +886,12 @@ struct Cryptonight_hash<1> const size_t MEM = algo.Mem(); CN_INIT_SINGLE; - REPEAT_1(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); + REPEAT_1(10, CN_INIT, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); // Optim - 90% time boundary for(size_t i = 0; i < ITERATIONS; i++) { - REPEAT_1(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1); + REPEAT_1(9, CN_STEP1, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1); REPEAT_1(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx); REPEAT_1(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm); REPEAT_1(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0); @@ -877,12 +915,12 @@ struct Cryptonight_hash<2> const size_t MEM = algo.Mem(); CN_INIT_SINGLE; - REPEAT_2(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); + REPEAT_2(10, CN_INIT, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); // Optim - 90% time boundary for(size_t i = 0; i < ITERATIONS; i++) { - REPEAT_2(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1); + REPEAT_2(9, CN_STEP1, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1); REPEAT_2(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx); REPEAT_2(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm); REPEAT_2(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0); @@ -906,12 +944,12 @@ struct Cryptonight_hash<3> const size_t MEM = algo.Mem(); CN_INIT_SINGLE; - REPEAT_3(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); + REPEAT_3(10, CN_INIT, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); // Optim - 90% time boundary for(size_t i = 0; i < ITERATIONS; i++) { - REPEAT_3(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1); + REPEAT_3(9, CN_STEP1, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1); REPEAT_3(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx); REPEAT_3(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm); REPEAT_3(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0); @@ -935,12 +973,12 @@ struct Cryptonight_hash<4> const size_t MEM = algo.Mem(); CN_INIT_SINGLE; - REPEAT_4(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); + REPEAT_4(10, CN_INIT, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); // Optim - 90% time boundary for(size_t i = 0; i < ITERATIONS; i++) { - REPEAT_4(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1); + REPEAT_4(9, CN_STEP1, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1); REPEAT_4(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx); REPEAT_4(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm); REPEAT_4(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0); @@ -964,12 +1002,12 @@ struct Cryptonight_hash<5> const size_t MEM = algo.Mem(); CN_INIT_SINGLE; - REPEAT_5(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); + REPEAT_5(10, CN_INIT, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm); // Optim - 90% time boundary for(size_t i = 0; i < ITERATIONS; i++) { - REPEAT_5(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1); + REPEAT_5(9, CN_STEP1, monero_const, conc_var, l0, ax0, bx0, idx0, ptr0, cx, bx1); REPEAT_5(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx); REPEAT_5(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm); REPEAT_5(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 440732210a450ad228dbea10177b7fdbe9bd5f2c..50507f2aee1e0e3bcce67eae76cd7c1f8f8f293b 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -407,6 +407,16 @@ bool minethd::self_test() hashf("", 0, out, ctx, algo); bResult = bResult && memcmp(out, "\x55\x5e\x0a\xee\x78\x79\x31\x6d\x7d\xef\xf7\x72\x97\x3c\xb9\x11\x8e\x38\x95\x70\x9d\xb2\x54\x7a\xc0\x72\xd5\xb9\x13\x10\x01\xd8", 32) == 0; } + else if(algo == POW(cryptonight_conceal)) + { + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("", 0, out, ctx, algo); + bResult = bResult && memcmp(out, "\xb5\x54\x4b\x58\x16\x70\x26\x47\x63\x47\xe4\x1f\xb6\x5e\x57\xc9\x7c\xa5\x93\xfe\x0e\xb1\x0f\xb9\x2f\xa7\x3e\x5b\xae\xef\x79\x8c", 32) == 0; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("", 0, out, ctx, algo); + bResult = bResult && memcmp(out, "\xb5\x54\x4b\x58\x16\x70\x26\x47\x63\x47\xe4\x1f\xb6\x5e\x57\xc9\x7c\xa5\x93\xfe\x0e\xb1\x0f\xb9\x2f\xa7\x3e\x5b\xae\xef\x79\x8c", 32) == 0; + } else if (algo == POW(cryptonight_turtle)) { hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); @@ -551,6 +561,9 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc case cryptonight_gpu: algv = 12; break; + case cryptonight_conceal: + algv = 13; + break; default: algv = 2; break; @@ -620,7 +633,12 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc Cryptonight_hash_gpu::template hash<cryptonight_gpu, false, false>, Cryptonight_hash_gpu::template hash<cryptonight_gpu, true, false>, Cryptonight_hash_gpu::template hash<cryptonight_gpu, false, true>, - Cryptonight_hash_gpu::template hash<cryptonight_gpu, true, true> + Cryptonight_hash_gpu::template hash<cryptonight_gpu, true, true>, + + Cryptonight_hash<N>::template hash<cryptonight_conceal, false, false>, + Cryptonight_hash<N>::template hash<cryptonight_conceal, true, false>, + Cryptonight_hash<N>::template hash<cryptonight_conceal, false, true>, + Cryptonight_hash<N>::template hash<cryptonight_conceal, true, true> }; std::bitset<2> digit; diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index b75adf217eb180f6edf15d68ce5bfdb403f285ab..db07877897e8e6bd8ff78eb770056351fc9caa98 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -23,6 +23,7 @@ enum xmrstak_algo_id cryptonight_monero_v8 = 11, cryptonight_superfast = 12, cryptonight_gpu = 13, + cryptonight_conceal = 14, cryptonight_turtle = start_derived_algo_id, cryptonight_v8_half = (start_derived_algo_id + 1), @@ -36,7 +37,7 @@ enum xmrstak_algo_id */ inline std::string get_algo_name(xmrstak_algo_id algo_id) { - static std::array<std::string, 14> base_algo_names = + static std::array<std::string, 15> base_algo_names = {{ "invalid_algo", "cryptonight", @@ -51,7 +52,8 @@ inline std::string get_algo_name(xmrstak_algo_id algo_id) "cryptonight_bittube2", "cryptonight_v8", "cryptonight_superfast", - "cryptonight_gpu" + "cryptonight_gpu", + "cryptonight_conceal" }}; static std::array<std::string, 3> derived_algo_names = @@ -168,11 +170,11 @@ constexpr uint32_t CN_GPU_ITER = 0xC000; // cryptonight turtle (the mask is not using the full 256kib scratchpad) constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0; -constexpr uint32_t CN_ZELERIUS_ITER = 0x6000; +constexpr uint32_t CN_ZELERIUS_ITER = 0x60000; inline xmrstak_algo POW(xmrstak_algo_id algo_id) { - static std::array<xmrstak_algo, 14> pow = {{ + static std::array<xmrstak_algo, 15> pow = {{ {invalid_algo, invalid_algo}, {cryptonight, cryptonight, CN_ITER, CN_MEMORY}, {cryptonight_lite, cryptonight_lite, CN_ITER/2, CN_MEMORY/2}, @@ -186,7 +188,8 @@ inline xmrstak_algo POW(xmrstak_algo_id algo_id) {cryptonight_bittube2, cryptonight_bittube2, CN_ITER/2, CN_MEMORY*2}, // derived from cryptonight_heavy with own aes-round implementation and minor other tweaks {cryptonight_monero_v8, cryptonight_monero_v8, CN_ITER, CN_MEMORY}, {cryptonight_superfast, cryptonight_superfast, CN_ITER/4, CN_MEMORY}, - {cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK} + {cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK}, + {cryptonight_conceal, cryptonight_conceal, CN_ITER/2, CN_MEMORY} }}; static std::array<xmrstak_algo, 3> derived_pow = diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index e151e8c02ec12779552752d1e7cf21a68a0131d1..184825222ce86cd94d57dd2b1747463586edbf77 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -523,6 +523,15 @@ __global__ void cryptonight_core_gpu_phase2_quad( uint32_t a, d[2], idx0; uint32_t t1[2], t2[2], res; + float conc_var; + if(ALGO == cryptonight_conceal) + { + if(partidx != 0) + conc_var = int_as_float(*(d_ctx_b + threads * 4 + thread * 4 + sub)); + else + conc_var = 0.0f; + } + uint32_t tweak1_2[2]; if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { @@ -585,7 +594,23 @@ __global__ void cryptonight_core_gpu_phase2_quad( } else { - const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j ); + uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j ); + + if(ALGO == cryptonight_conceal) + { + float r = int2float((int32_t)x_0); + float c_old = conc_var; + + r += conc_var; + r = r * r * r; + r = int_as_float((float_as_int(r) & 0x807FFFFF) | 0x40000000); + conc_var += r; + + c_old = int_as_float((float_as_int(c_old) & 0x807FFFFF) | 0x40000000); + c_old *= 536870880.0f; + x_0 = (uint32_t)(((int32_t)x_0) ^ ((int32_t)c_old)); + } + const uint32_t x_1 = shuffle<4>(sPtr,sub, x_0, sub + 1); const uint32_t x_2 = shuffle<4>(sPtr,sub, x_0, sub + 2); const uint32_t x_3 = shuffle<4>(sPtr,sub, x_0, sub + 3); @@ -687,6 +712,8 @@ __global__ void cryptonight_core_gpu_phase2_quad( if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast) if(sub&1) *(d_ctx_b + threads * 4 + thread) = idx0; + if(ALGO == cryptonight_conceal) + *(d_ctx_b + threads * 4 + thread * 4 + sub) = float_as_int(conc_var); } } @@ -989,7 +1016,10 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, ui cryptonight_core_gpu_hash<cryptonight_superfast, 1>, cryptonight_core_gpu_hash_gpu<cryptonight_gpu, 0>, - cryptonight_core_gpu_hash_gpu<cryptonight_gpu, 1> + cryptonight_core_gpu_hash_gpu<cryptonight_gpu, 1>, + + cryptonight_core_gpu_hash<cryptonight_conceal, 0>, + cryptonight_core_gpu_hash<cryptonight_conceal, 1> }; std::bitset<1> digit; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index e20373b7d82216c0d2d8534b24ba560a0efa46a4..43e21fb428ce9d7772a740f0247dad596ce00b17 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -310,6 +310,10 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) // create a double buffer for the state to exchange the mixed state to phase1 CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state2, 50 * sizeof(uint32_t) * wsize)); } + else if(std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_conceal) != neededAlgorithms.end()) + { + ctx_b_size += sizeof(uint32_t) * 4 * wsize; + } else if(std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()) { // bx1 (16byte), division_result (8byte) and sqrt_result (8byte) diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index 41e077889cb032264ac5d754838d7efe0babf332..5dbddb09b4c3c3c47b8d4c7d10190c3cfe565c85 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -105,7 +105,8 @@ xmrstak::coin_selection coins[] = { { "cryptonight_v8_half", {POW(cryptonight_v8_half)}, {POW(cryptonight_monero_v8)}, nullptr }, { "cryptonight_v8_zelerius", {POW(cryptonight_v8_zelerius)},{POW(cryptonight_monero_v8)}, nullptr }, { "cryptonight_v7_stellite", {POW(cryptonight_stellite)}, {POW(cryptonight_monero_v8)}, nullptr }, - { "cryptonight_gpu", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, nullptr }, + { "cryptonight_gpu", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, "pool.ryo-currency.com:3333" }, + { "cryptonight_conceal", {POW(cryptonight_conceal)}, {POW(cryptonight_monero_v8)}, nullptr }, { "freehaven", {POW(cryptonight_superfast)}, {POW(cryptonight_monero_v8)}, nullptr }, { "graft", {POW(cryptonight_monero_v8)}, {POW(cryptonight_monero_v8)}, nullptr }, { "haven", {POW(cryptonight_haven)}, {POW(cryptonight_heavy)}, nullptr }, @@ -113,7 +114,7 @@ xmrstak::coin_selection coins[] = { { "masari", {POW(cryptonight_v8_half)}, {POW(cryptonight_monero_v8)}, nullptr }, { "monero", {POW(cryptonight_monero_v8)}, {POW(cryptonight_monero_v8)}, "pool.usxmrpool.com:3333" }, { "qrl", {POW(cryptonight_monero)}, {POW(cryptonight_monero_v8)}, nullptr }, - { "ryo", {POW(cryptonight_gpu), 6u, POW(cryptonight_heavy)}, {POW(cryptonight_gpu), 6u, POW(cryptonight_heavy)}, nullptr }, + { "ryo", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, "pool.ryo-currency.com:3333" }, { "stellite", {POW(cryptonight_v8_half)}, {POW(cryptonight_monero_v8)}, nullptr }, { "turtlecoin", {POW(cryptonight_turtle), 6u,POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, { "plenteum", {POW(cryptonight_turtle)}, {POW(cryptonight_turtle)}, nullptr }, diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index f5a51ab250ea091fbf3571e02dfc73aec98ae633..a118989ccaa7aad10f3d8dee4444c04cbf57a50d 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.8.2" +#define XMR_STAK_VERSION "2.8.3" #if defined(_WIN32) #define OS_TYPE "win"