diff --git a/README.md b/README.md index 6b7755f3c46614fa34cc6e597afec42fabeddea7..887bc5cf30996e0f0616b246a3cf6ada707b7328 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this - [Aeon](http://www.aeon.cash) - [BBSCoin](https://www.bbscoin.xyz) +- [BitTube](https://coin.bit.tube/) - [Graft](https://www.graft.network) - [Haven](https://havenprotocol.com) - [Intense](https://intensecoin.com) -- [IPBC](https://ipbc.io) - [Masari](https://getmasari.org) - [Ryo](https://ryo-currency.com) - [TurtleCoin](https://turtlecoin.lol) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 596ee2127679f07da3fbf8fdd3731e5d0084d7d4..87721ac8f606a6267c20dc1993554a3958893b79 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -1004,7 +1004,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return(ERR_OCL_API); } - if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite || miner_algo == cryptonight_masari) + if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite || miner_algo == cryptonight_masari || miner_algo == cryptonight_bittube2) { // Input if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index ed4bebbcc7d8e041d6c8ebe686a73c47f75fea1f..002472d3aa7e59f092619bfe1532c510670cd07a 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -513,8 +513,8 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, mem_fence(CLK_LOCAL_MEM_FENCE); -// cryptonight_heavy or cryptonight_haven -#if (ALGO == 4 || ALGO == 9) +// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 +#if (ALGO == 4 || ALGO == 9 || ALGO == 10) __local uint4 xin[8][WORKSIZE]; /* Also left over threads perform this loop. @@ -551,11 +551,11 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, } mem_fence(CLK_GLOBAL_MEM_FENCE); } - + __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states, ulong Threads -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari || cryptonight_bittube2 +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8 || ALGO == 10) , __global ulong *input #endif ) @@ -575,8 +575,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states } barrier(CLK_LOCAL_MEM_FENCE); -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari || cryptonight_bittube2 +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8 || ALGO == 10) uint2 tweak1_2; #endif uint4 b_x; @@ -600,8 +600,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states b[1] = states[3] ^ states[7]; b_x = ((uint4 *)b)[0]; -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari || cryptonight_bittube2 +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8 || ALGO == 10) tweak1_2 = as_uint2(input[4]); tweak1_2.s0 >>= 24; tweak1_2.s0 |= tweak1_2.s1 << 8; @@ -625,11 +625,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ulong c[2]; ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)]; +// cryptonight_bittube2 +#if(ALGO == 10) + ((uint4 *)c)[0] = AES_Round_bittube2(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); +#else ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); - +#endif b_x ^= ((uint4 *)c)[0]; -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari || cryptonight_bittube2 +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8 || ALGO == 10) uint table = 0x75310U; // cryptonight_stellite # if(ALGO == 7) @@ -647,10 +651,11 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states a[1] += c[0] * as_ulong2(tmp).s0; a[0] += mul_hi(c[0], as_ulong2(tmp).s0); -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari || cryptonight_bittube2 +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8 || ALGO == 10) -# if(ALGO == 6) +// cryptonight_ipbc || cryptonight_bittube2 +# if(ALGO == 6 || ALGO == 10) uint2 ipbc_tmp = tweak1_2 ^ ((uint2 *)&(a[0]))[0]; ((uint2 *)&(a[1]))[0] ^= ipbc_tmp; Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0]; @@ -670,8 +675,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states b_x = ((uint4 *)c)[0]; -// cryptonight_heavy -#if (ALGO == 4) +// cryptonight_heavy || cryptonight_bittube2 +#if (ALGO == 4 || ALGO == 10) long n = *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))); int d = ((__global int*)(Scratchpad + (IDX((idx0 & MASK) >> 4))))[2]; long q = n / (d | 0x5); @@ -744,8 +749,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states } barrier(CLK_LOCAL_MEM_FENCE); -// cryptonight_heavy or cryptonight_haven -#if (ALGO == 4 || ALGO == 9) +// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 +#if (ALGO == 4 || ALGO == 9 || ALGO == 10) __local uint4 xin[8][WORKSIZE]; #endif @@ -754,8 +759,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states if(gIdx < Threads) #endif { -// cryptonight_heavy or cryptonight_haven -#if (ALGO == 4 || ALGO == 9) +// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 +#if (ALGO == 4 || ALGO == 9 || ALGO == 10) #pragma unroll 2 for(int i = 0; i < (MEMORY >> 7); ++i) { @@ -801,8 +806,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states #endif } -// cryptonight_heavy or cryptonight_haven -#if (ALGO == 4 || ALGO == 9) +// cryptonight_heavy or cryptonight_haven || cryptonight_bittube2 +#if (ALGO == 4 || ALGO == 9 || ALGO == 10) /* Also left over threads perform this loop. * The left over thread results will be ignored */ diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl index 81e1644f17886a95d37fac0a3af8bc7a8832258a..24ce4904fe1ad0ba9df047613c77eb9e92735bed 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl @@ -74,6 +74,19 @@ static const __constant uint AES0_C[256] = #define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U)) +inline uint4 AES_Round_bittube2(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, uint4 x, uint4 k) +{ + x = ~x; + k[0] ^= AES0[BYTE(x[0], 0)] ^ AES1[BYTE(x[1], 1)] ^ AES2[BYTE(x[2], 2)] ^ AES3[BYTE(x[3], 3)]; + x[0] ^= k[0]; + k[1] ^= AES0[BYTE(x[1], 0)] ^ AES1[BYTE(x[2], 1)] ^ AES2[BYTE(x[3], 2)] ^ AES3[BYTE(x[0], 3)]; + x[1] ^= k[1]; + k[2] ^= AES0[BYTE(x[2], 0)] ^ AES1[BYTE(x[3], 1)] ^ AES2[BYTE(x[0], 2)] ^ AES3[BYTE(x[1], 3)]; + x[2] ^= k[2]; + k[3] ^= AES0[BYTE(x[3], 0)] ^ AES1[BYTE(x[0], 1)] ^ AES2[BYTE(x[1], 2)] ^ AES3[BYTE(x[2], 3)]; + return k; +} + uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key) { key.s0 ^= AES0[BYTE(X.s0, 0)]; diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index e61381aa45c3e725cb7cfbf137cdd04749f43a42..9f70bcfa709d2a8be889c30f3766a6a812114d82 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -180,7 +180,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) xin6 = _mm_load_si128(input + 10); xin7 = _mm_load_si128(input + 11); - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { for(size_t i=0; i < 16; i++) { @@ -324,11 +324,11 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); } - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); } - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { @@ -375,7 +375,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); } - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); } @@ -422,6 +422,24 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) _mm_store_si128(output + 11, xout7); } +inline __m128i aes_round_bittube2(const __m128i& val, const __m128i& key) +{ + alignas(16) uint32_t k[4]; + alignas(16) uint32_t x[4]; + _mm_store_si128((__m128i*)k, key); + _mm_store_si128((__m128i*)x, _mm_xor_si128(val, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))); // x = ~val + #define BYTE(p, i) ((unsigned char*)&p)[i] + k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^ saes_table[3][BYTE(x[3], 3)]; + x[0] ^= k[0]; + k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^ saes_table[3][BYTE(x[0], 3)]; + x[1] ^= k[1]; + k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^ saes_table[3][BYTE(x[1], 3)]; + x[2] ^= k[2]; + k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^ saes_table[3][BYTE(x[2], 3)]; + #undef BYTE + return _mm_load_si128((__m128i*)k); +} + template<xmrstak_algo ALGO> inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) { @@ -432,7 +450,7 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) uint8_t x = static_cast<uint8_t>(vh >> 24); static const uint16_t table = 0x7531; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; vh ^= ((table >> index) & 0x3) << 28; @@ -456,7 +474,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) { memset(output, 0, 32); return; @@ -465,7 +483,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c keccak((const uint8_t *)input, len, ctx0->hash_state, 200); uint64_t monero_const; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { monero_const = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const ^= *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24); @@ -489,12 +507,19 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c __m128i cx; cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]); - if(SOFT_AES) - cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + if (ALGO == cryptonight_bittube2) + { + cx = aes_round_bittube2(cx, _mm_set_epi64x(ah0, al0)); + } else - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + { + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + } - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -518,9 +543,8 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); ah0 += lo; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) - { - if(ALGO == cryptonight_ipbc) + if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { + if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0]; else ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const; @@ -531,7 +555,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c idx0 = al0; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) { int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; @@ -570,7 +594,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) { memset(output, 0, 64); return; @@ -580,7 +604,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200); uint64_t monero_const_0, monero_const_1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { monero_const_0 = *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35); monero_const_0 ^= *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24); @@ -613,12 +637,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto __m128i cx; cx = _mm_load_si128((__m128i *)&l0[idx0 & MASK]); - if(SOFT_AES) - cx = soft_aesenc(cx, _mm_set_epi64x(axh0, axl0)); + if (ALGO == cryptonight_bittube2) + { + cx = aes_round_bittube2(cx, _mm_set_epi64x(axh0, axl0)); + } else - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); + { + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(axh0, axl0)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); + } - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -631,12 +662,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto cx = _mm_load_si128((__m128i *)&l1[idx1 & MASK]); - if(SOFT_AES) - cx = soft_aesenc(cx, _mm_set_epi64x(axh1, axl1)); + if (ALGO == cryptonight_bittube2) + { + cx = aes_round_bittube2(cx, _mm_set_epi64x(axh1, axl1)); + } else - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); + { + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(axh1, axl1)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); + } - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); else _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); @@ -657,21 +695,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = axl0; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) - { - if(ALGO == cryptonight_ipbc) + if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { + if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0]; else ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0; - } - else + } else ((uint64_t*)&l0[idx0 & MASK])[1] = axh0; axh0 ^= ch; axl0 ^= cl; idx0 = axl0; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) { int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; @@ -702,21 +738,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh1 += lo; ((uint64_t*)&l1[idx1 & MASK])[0] = axl1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) - { - if(ALGO == cryptonight_ipbc) + if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) { + if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0]; else ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1; - } - else + } else ((uint64_t*)&l1[idx1 & MASK])[1] = axh1; axh1 ^= ch; axl1 ^= cl; idx1 = axl1; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) { int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; @@ -758,12 +792,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto c = _mm_load_si128(ptr); #define CN_STEP2(a, b, c, l, ptr, idx) \ - if(SOFT_AES) \ - c = soft_aesenc(c, a); \ - else \ - c = _mm_aesenc_si128(c, a); \ + if (ALGO == cryptonight_bittube2) \ + { \ + c = aes_round_bittube2(c, a); \ + } \ + else \ + { \ + if(SOFT_AES) \ + c = soft_aesenc(c, a); \ + else \ + c = _mm_aesenc_si128(c, a); \ + } \ b = _mm_xor_si128(b, c); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \ cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, b); \ else \ _mm_store_si128(ptr, b);\ @@ -778,17 +819,17 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \ { \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ - if (ALGO == cryptonight_ipbc) \ + if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) \ ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];\ } \ else \ _mm_store_si128(ptr, a);\ a = _mm_xor_si128(a, b); \ idx = _mm_cvtsi128_si64(a); \ - if(ALGO == cryptonight_heavy) \ + if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) \ { \ int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ @@ -817,7 +858,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) { memset(output, 0, 32 * 3); return; @@ -911,7 +952,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) { memset(output, 0, 32 * 4); return; @@ -1020,7 +1061,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton constexpr size_t ITERATIONS = cn_select_iter<ALGO>(); constexpr size_t MEM = cn_select_memory<ALGO>(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) { memset(output, 0, 32 * 5); return; diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 02224697f850f92cf6b60c6272314ca13cba64a9..2e7169ef7c54c852894e52e75d75c6b6d4a08aed 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -25,7 +25,7 @@ #include "xmrstak/misc/console.hpp" #include "xmrstak/backend/iBackend.hpp" -#include "xmrstak/backend//globalStates.hpp" +#include "xmrstak/backend/globalStates.hpp" #include "xmrstak/misc/configEditor.hpp" #include "xmrstak/params.hpp" #include "jconf.hpp" @@ -295,6 +295,22 @@ bool minethd::self_test() else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_masari) { } + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_bittube2) + { + unsigned char out[32 * MAX_N]; + cn_hash_fun hashf; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_bittube2); + + hashf("\x38\x27\x4c\x97\xc4\x5a\x17\x2c\xfc\x97\x67\x98\x70\x42\x2e\x3a\x1a\xb0\x78\x49\x60\xc6\x05\x14\xd8\x16\x27\x14\x15\xc3\x06\xee\x3a\x3e\xd1\xa7\x7e\x31\xf6\xa8\x85\xc3\xcb\xff\x01\x02\x03\x04", 48, out, ctx[0]); + bResult = memcmp(out, "\x18\x2c\x30\x41\x93\x1a\x14\x73\xc6\xbf\x7e\x77\xfe\xb5\x17\x9b\xa8\xbe\xa9\x68\xba\x9e\xe1\xe8\x24\x1a\x12\x7a\xac\x81\xb4\x24", 32) == 0; + + hashf("\x04\x04\xb4\x94\xce\xd9\x05\x18\xe7\x25\x5d\x01\x28\x63\xde\x8a\x4d\x27\x72\xb1\xff\x78\x8c\xd0\x56\x20\x38\x98\x3e\xd6\x8c\x94\xea\x00\xfe\x43\x66\x68\x83\x00\x00\x00\x00\x18\x7c\x2e\x0f\x66\xf5\x6b\xb9\xef\x67\xed\x35\x14\x5c\x69\xd4\x69\x0d\x1f\x98\x22\x44\x01\x2b\xea\x69\x6e\xe8\xb3\x3c\x42\x12\x01", 76, out, ctx[0]); + bResult = bResult && memcmp(out, "\x7f\xbe\xb9\x92\x76\x87\x5a\x3c\x43\xc2\xbe\x5a\x73\x36\x06\xb5\xdc\x79\xcc\x9c\xf3\x7c\x43\x3e\xb4\x18\x56\x17\xfb\x9b\xc9\x36", 32) == 0; + + hashf("\x85\x19\xe0\x39\x17\x2b\x0d\x70\xe5\xca\x7b\x33\x83\xd6\xb3\x16\x73\x15\xa4\x22\x74\x7b\x73\xf0\x19\xcf\x95\x28\xf0\xfd\xe3\x41\xfd\x0f\x2a\x63\x03\x0b\xa6\x45\x05\x25\xcf\x6d\xe3\x18\x37\x66\x9a\xf6\xf1\xdf\x81\x31\xfa\xf5\x0a\xaa\xb8\xd3\xa7\x40\x55\x89", 64, out, ctx[0]); + bResult = bResult && memcmp(out, "\x90\xdc\x65\x53\x8d\xb0\x00\xea\xa2\x52\xcd\xd4\x1c\x17\x7a\x64\xfe\xff\x95\x36\xe7\x71\x68\x35\xd4\xcf\x5c\x73\x56\xb1\x2f\xcd", 32) == 0; + } for (int i = 0; i < MAX_N; i++) cryptonight_free_ctx(ctx[i]); @@ -386,6 +402,9 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr case cryptonight_haven: algv = 8; break; + case cryptonight_bittube2: + algv = 9; + break; default: algv = 2; break; @@ -427,7 +446,11 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr cryptonight_hash<cryptonight_haven, false, false>, cryptonight_hash<cryptonight_haven, true, false>, cryptonight_hash<cryptonight_haven, false, true>, - cryptonight_hash<cryptonight_haven, true, true> + cryptonight_hash<cryptonight_haven, true, true>, + cryptonight_hash<cryptonight_bittube2, false, false>, + cryptonight_hash<cryptonight_bittube2, true, false>, + cryptonight_hash<cryptonight_bittube2, false, true>, + cryptonight_hash<cryptonight_bittube2, true, true> }; std::bitset<2> digit; @@ -579,6 +602,9 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, case cryptonight_haven: algv = 8; break; + case cryptonight_bittube2: + algv = 9; + break; default: algv = 2; break; @@ -736,8 +762,24 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash<cryptonight_haven, false, false>, cryptonight_penta_hash<cryptonight_haven, true, false>, cryptonight_penta_hash<cryptonight_haven, false, true>, - cryptonight_penta_hash<cryptonight_haven, true, true> - + cryptonight_penta_hash<cryptonight_haven, true, true>, + + cryptonight_double_hash<cryptonight_bittube2, false, false>, + cryptonight_double_hash<cryptonight_bittube2, true, false>, + cryptonight_double_hash<cryptonight_bittube2, false, true>, + cryptonight_double_hash<cryptonight_bittube2, true, true>, + cryptonight_triple_hash<cryptonight_bittube2, false, false>, + cryptonight_triple_hash<cryptonight_bittube2, true, false>, + cryptonight_triple_hash<cryptonight_bittube2, false, true>, + cryptonight_triple_hash<cryptonight_bittube2, true, true>, + cryptonight_quad_hash<cryptonight_bittube2, false, false>, + cryptonight_quad_hash<cryptonight_bittube2, true, false>, + cryptonight_quad_hash<cryptonight_bittube2, false, true>, + cryptonight_quad_hash<cryptonight_bittube2, true, true>, + cryptonight_penta_hash<cryptonight_bittube2, false, false>, + cryptonight_penta_hash<cryptonight_bittube2, true, false>, + cryptonight_penta_hash<cryptonight_bittube2, false, true>, + cryptonight_penta_hash<cryptonight_bittube2, true, true> }; std::bitset<2> digit; diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 595375610518dde61dc9191c29847c5b218c5c4d..b6f656138024e78e6877a8d86a839f39e31f9aaa 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -14,7 +14,8 @@ enum xmrstak_algo cryptonight_ipbc = 6, // equal to cryptonight_aeon with a small tweak in the miner code cryptonight_stellite = 7, //equal to cryptonight_monero but with one tiny change cryptonight_masari = 8, //equal to cryptonight_monero but with less iterations, used by masari - cryptonight_haven = 9 // // equal to cryptonight_heavy with a small tweak + cryptonight_haven = 9, // equal to cryptonight_heavy with a small tweak + cryptonight_bittube2 = 10, // derived from cryptonight_heavy with own aes-round implementation and minor other tweaks }; // define aeon settings @@ -62,6 +63,9 @@ inline constexpr size_t cn_select_memory<cryptonight_masari>() { return CRYPTONI template<> inline constexpr size_t cn_select_memory<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_MEMORY; } +template<> +inline constexpr size_t cn_select_memory<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_MEMORY; } + inline size_t cn_select_memory(xmrstak_algo algo) { switch(algo) @@ -75,8 +79,9 @@ inline size_t cn_select_memory(xmrstak_algo algo) case cryptonight_aeon: case cryptonight_lite: return CRYPTONIGHT_LITE_MEMORY; - case cryptonight_heavy: + case cryptonight_bittube2: case cryptonight_haven: + case cryptonight_heavy: return CRYPTONIGHT_HEAVY_MEMORY; default: return 0; @@ -113,6 +118,8 @@ inline constexpr uint32_t cn_select_mask<cryptonight_masari>() { return CRYPTONI template<> inline constexpr uint32_t cn_select_mask<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_MASK; } +template<> +inline constexpr uint32_t cn_select_mask<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_MASK; } inline size_t cn_select_mask(xmrstak_algo algo) { @@ -127,8 +134,9 @@ inline size_t cn_select_mask(xmrstak_algo algo) case cryptonight_aeon: case cryptonight_lite: return CRYPTONIGHT_LITE_MASK; - case cryptonight_heavy: + case cryptonight_bittube2: case cryptonight_haven: + case cryptonight_heavy: return CRYPTONIGHT_HEAVY_MASK; default: return 0; @@ -165,6 +173,8 @@ inline constexpr uint32_t cn_select_iter<cryptonight_masari>() { return CRYPTONI template<> inline constexpr uint32_t cn_select_iter<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_ITER; } +template<> +inline constexpr uint32_t cn_select_iter<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_ITER; } inline size_t cn_select_iter(xmrstak_algo algo) { @@ -178,8 +188,9 @@ inline size_t cn_select_iter(xmrstak_algo algo) case cryptonight_aeon: case cryptonight_lite: return CRYPTONIGHT_LITE_ITER; - case cryptonight_heavy: + case cryptonight_bittube2: case cryptonight_haven: + case cryptonight_heavy: return CRYPTONIGHT_HEAVY_ITER; case cryptonight_masari: return CRYPTONIGHT_MASARI_ITER; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 48243e3b50b1f579c0ede3f753a7a4d856a4eb27..39f51ea5ed49de5431796ded4cf629b6bdc50fed 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -231,7 +231,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti uint32_t t1[2], t2[2], res; uint32_t tweak1_2[2]; - if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { uint32_t * state = d_ctx_state + thread * 50; tweak1_2[0] = (d_input[8] >> 24) | (d_input[9] << 8); @@ -242,7 +242,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti a = (d_ctx_a + thread * 4)[sub]; idx0 = shuffle<4>(sPtr,sub, a, 0); - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { if(partidx != 0) { @@ -260,25 +260,56 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti { j = ( ( idx0 & MASK ) >> 2 ) + sub; - const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j ); - const uint32_t x_1 = shuffle<4>(sPtr,sub, x_0, sub + 1); - const uint32_t x_2 = shuffle<4>(sPtr,sub, x_0, sub + 2); - const uint32_t x_3 = shuffle<4>(sPtr,sub, x_0, sub + 3); - d[x] = a ^ - t_fn0( x_0 & 0xff ) ^ - t_fn1( (x_1 >> 8) & 0xff ) ^ - t_fn2( (x_2 >> 16) & 0xff ) ^ - t_fn3( ( x_3 >> 24 ) ); + if(ALGO == cryptonight_bittube2) + { + uint32_t k[4]; + k[0] = ~loadGlobal32<uint32_t>( long_state + j ); + k[1] = shuffle<4>(sPtr,sub, k[0], sub + 1); + k[2] = shuffle<4>(sPtr,sub, k[0], sub + 2); + k[3] = shuffle<4>(sPtr,sub, k[0], sub + 3); + + uint32_t r; + #pragma unroll 4 + for(int i = 0; i < 4; ++i) + { + // only calculate the key if all data are up to date + if(i == sub) + { + r = a ^ + t_fn0( k[0] & 0xff ) ^ + t_fn1( (k[1] >> 8) & 0xff ) ^ + t_fn2( (k[2] >> 16) & 0xff ) ^ + t_fn3( (k[3] >> 24 ) ); + } + /* avoid negative number for modulo + * load valid key (k) depending on the round + */ + k[(4 - sub + i)%4] = shuffle<4>(sPtr,sub, k[0] ^ r, i); + } + d[x] = r; + } + else + { + const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j ); + const uint32_t x_1 = shuffle<4>(sPtr,sub, x_0, sub + 1); + const uint32_t x_2 = shuffle<4>(sPtr,sub, x_0, sub + 2); + const uint32_t x_3 = shuffle<4>(sPtr,sub, x_0, sub + 3); + d[x] = a ^ + t_fn0( x_0 & 0xff ) ^ + t_fn1( (x_1 >> 8) & 0xff ) ^ + t_fn2( (x_2 >> 16) & 0xff ) ^ + t_fn3( ( x_3 >> 24 ) ); + } //XOR_BLOCKS_DST(c, b, &long_state[j]); t1[0] = shuffle<4>(sPtr,sub, d[x], 0); const uint32_t z = d[0] ^ d[1]; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { const uint32_t table = 0x75310U; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2); const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24; @@ -312,12 +343,12 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti res = *( (uint64_t *) t2 ) >> ( sub & 1 ? 32 : 0 ); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) { const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res; uint32_t long_state_update = sub2 ? tweaked_res : res; - if (ALGO == cryptonight_ipbc) + if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) { uint32_t value = shuffle<4>(sPtr,sub, long_state_update, sub & 1) ^ long_state_update; long_state_update = sub >= 2 ? value : long_state_update; @@ -330,7 +361,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti a = ( sub & 1 ? yy[1] : yy[0] ) ^ res; idx0 = shuffle<4>(sPtr,sub, a, 0); - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) { int64_t n = loadGlobal64<uint64_t>( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3)); int32_t d = loadGlobal32<uint32_t>( (uint32_t*)(( (uint64_t *) long_state ) + (( idx0 & MASK) >> 3) + 1u )); @@ -341,7 +372,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti idx0 = d ^ q; } - else if(ALGO == cryptonight_haven) + else if(ALGO == cryptonight_haven) { int64_t n = loadGlobal64<uint64_t>( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3)); int32_t d = loadGlobal32<uint32_t>( (uint32_t*)(( (uint64_t *) long_state ) + (( idx0 & MASK) >> 3) + 1u )); @@ -359,7 +390,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti { (d_ctx_a + thread * 4)[sub] = a; (d_ctx_b + thread * 4)[sub] = d[1]; - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) if(sub&1) *(d_ctx_b + threads * 4 + thread) = idx0; } @@ -405,7 +436,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti cn_aes_pseudo_round_mut( sharedMemory, text, key ); - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { #pragma unroll for ( int j = 0; j < 4; ++j ) @@ -442,7 +473,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,MEMORY><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, - (ALGO == cryptonight_heavy || ALGO == cryptonight_haven ? ctx->d_ctx_state2 : ctx->d_ctx_state), + (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 ? ctx->d_ctx_state2 : ctx->d_ctx_state), ctx->d_ctx_key1 )); if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); @@ -476,7 +507,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) int roundsPhase3 = partcountOneThree; - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { // cryptonight_heavy used two full rounds over the scratchpad memory roundsPhase3 *= 2; @@ -534,5 +565,9 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t { cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_haven>(ctx, startNonce); } + else if(miner_algo == cryptonight_bittube2) + { + cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_bittube2>(ctx, startNonce); + } } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 2cb3702fa5c8c1e9a459d5ad0f73eca543de6dcd..3b049ace83e7ce705357764d5ac63dd8f97c2eea 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -114,7 +114,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric int thread = ( blockDim.x * blockIdx.x + threadIdx.x ); __shared__ uint32_t sharedMemory[1024]; - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { cn_aes_gpu_init( sharedMemory ); __syncthreads( ); @@ -148,7 +148,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric memcpy( d_ctx_key2 + thread * 40, ctx_key2, 40 * 4 ); memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 ); - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { for(int i=0; i < 16; i++) @@ -172,7 +172,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 __shared__ uint32_t sharedMemory[1024]; - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { cn_aes_gpu_init( sharedMemory ); __syncthreads( ); @@ -189,7 +189,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 for ( i = 0; i < 50; i++ ) state[i] = ctx_state[i]; - if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2) { uint32_t key[40]; @@ -287,7 +287,11 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize; - if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) + if( + cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || + cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || + cryptonight_bittube2 == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() + ) { // extent ctx_b to hold the state of idx0 ctx_b_size += sizeof(uint32_t) * wsize; @@ -331,6 +335,11 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<cryptonight_haven><<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); } + else if(miner_algo == cryptonight_bittube2) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<cryptonight_bittube2><<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } else { /* pass two times d_ctx_state because the second state is used later in phase1, @@ -368,6 +377,14 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, cryptonight_extra_gpu_final<cryptonight_haven><<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) ); } + else if(miner_algo == cryptonight_bittube2) + { + CUDA_CHECK_MSG_KERNEL( + ctx->device_id, + "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**", + cryptonight_extra_gpu_final<cryptonight_bittube2><<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) + ); + } else { // fallback for all other algorithms @@ -625,7 +642,11 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) // up to 16kibyte extra memory is used per thread for some kernel (lmem/local memory) // 680bytes are extra meta data memory per hash size_t perThread = hashMemSize + 16192u + 680u; - if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) + if( + cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || + cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || + cryptonight_bittube2 == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() + ) perThread += 50 * 4; // state double buffer size_t max_intensity = limitedMemory / perThread; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index ba1e1ace4b68cb5cf84157948dfa68085886fdd8..354388849c0ee916a77e2cea99fcef220691be1a 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -89,7 +89,9 @@ xmrstak::coin_selection coins[] = { // name, userpool, devpool, default_pool_suggestion { "aeon7", {cryptonight_aeon, cryptonight_lite, 7u}, {cryptonight_aeon, cryptonight_lite, 7u}, "mine.aeon-pool.com:5555" }, { "bbscoin", {cryptonight_monero, cryptonight, 3u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, + { "bittube", {cryptonight_bittube2, cryptonight_bittube2, 0}, {cryptonight_heavy, cryptonight_heavy, 0u},"mining.bit.tube:13333"}, { "cryptonight", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, + { "cryptonight_bittube2",{cryptonight_bittube2, cryptonight_bittube2, 0}, {cryptonight_heavy, cryptonight_heavy, 0u},nullptr}, { "cryptonight_masari", {cryptonight_monero, cryptonight_masari, 255u}, {cryptonight_monero, cryptonight_monero, 0u},nullptr }, { "cryptonight_haven", {cryptonight_heavy, cryptonight_haven, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, { "cryptonight_heavy", {cryptonight_heavy, cryptonight_heavy, 0u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, @@ -101,7 +103,6 @@ xmrstak::coin_selection coins[] = { { "graft", {cryptonight_monero, cryptonight, 8u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "haven", {cryptonight_haven, cryptonight_heavy, 3u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, { "intense", {cryptonight_monero, cryptonight, 4u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, - { "ipbc", {cryptonight_aeon, cryptonight_ipbc, 255u}, {cryptonight_aeon, cryptonight_aeon, 255u}, nullptr }, { "masari", {cryptonight_masari, cryptonight_monero, 7u}, {cryptonight_monero, cryptonight_monero, 0u},nullptr }, { "monero7", {cryptonight_monero, cryptonight_monero, 0u}, {cryptonight_monero, cryptonight_monero, 0u}, "pool.usxmrpool.com:3333" }, { "ryo", {cryptonight_heavy, cryptonight_heavy, 0u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index 85ac47f7b8bf04693e9c163d62e6214b0364bba4..6960d63bb1e5b2a2085f0d7a1be130d137922162 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -22,10 +22,10 @@ POOLCONF], * * aeon7 (use this for Aeon's new PoW) * bbscoin (automatic switch with block version 3 to cryptonight_v7) + * bittube (uses cyrptonight_bittube2 algorithm) * graft * haven (automatic switch with block version 3 to cryptonight_haven) * intense - * ipbc * masari * monero7 (use this for Monero's new PoW) * ryo @@ -41,6 +41,7 @@ POOLCONF], * cryptonight * cryptonight_v7 * # 4MiB scratchpad memory + * cyrptonight_bittube2 * cryptonight_haven * cryptonight_heavy */