diff --git a/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp index a289ac559331c0b3948d1a07846797151a2b4a7a..2fc1a8baafaad969619aa525285ba7f45528b240 100644 --- a/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp +++ b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp @@ -74,7 +74,7 @@ static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int } } -void v4_compile_code(cryptonight_ctx* ctx, int code_size) +void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size) { printer::inst()->print_msg(LDEBUG, "CryptonightR update ASM code"); const int allocation_size = 65536; @@ -89,12 +89,24 @@ void v4_compile_code(cryptonight_ctx* ctx, int code_size) if(ctx->fun_data != nullptr) { - add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); - add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); - add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); - *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); - add_code(p, CryptonightR_template_part3, CryptonightR_template_end); - + if(N == 2) + { + add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); + *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); + add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end); + } + else + { + add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); + *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); + add_code(p, CryptonightR_template_part3, CryptonightR_template_end); + } ctx->loop_fn = reinterpret_cast<cn_mainloop_fun>(ctx->fun_data); protectExecutableMemory(ctx->fun_data, allocation_size); diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h index bd0c4967e0cd1271f1299c2fc2222325cd9ac15d..488805ec05516c07118f3bc4d652d0ca2eac66f1 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight.h +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -16,7 +16,7 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx); typedef void (*cn_double_mainloop_fun)(cryptonight_ctx*, cryptonight_ctx*); typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); -void v4_compile_code(cryptonight_ctx* ctx, int code_size); +void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size); struct extra_ctx_r { diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 22fd0f481185bdd853a9f3d6fa7eb62b83325175..2a8705baf70ab337d667b24821b815b9cd020113 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -1143,7 +1143,15 @@ struct Cryptonight_hash_asm<2, 0> cn_explode_scratchpad<false, false, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); } - reinterpret_cast<cn_double_mainloop_fun>(ctx[0]->loop_fn)(ctx[0], ctx[1]); + if(ALGO == cryptonight_r) + { + typedef void ABI_ATTRIBUTE (*cn_r_double_mainloop_fun)(cryptonight_ctx*, cryptonight_ctx*); + reinterpret_cast<cn_r_double_mainloop_fun>(ctx[0]->loop_fn)(ctx[0], ctx[1]); + } + else + { + reinterpret_cast<cn_double_mainloop_fun>(ctx[0]->loop_fn)(ctx[0], ctx[1]); + } for(size_t i = 0; i < N; ++i) { @@ -1327,8 +1335,11 @@ struct Cryptonight_R_generator int code_size = v4_random_math_init<ALGO>(ctx[0]->cn_r_ctx.code, work.iBlockHeight); if(ctx[0]->asm_version != 0) { - v4_compile_code(ctx[0], code_size); - ctx[0]->hash_fn = Cryptonight_hash_asm<N, 1u>::template hash<cryptonight_r>; + v4_compile_code(N, ctx[0], code_size); + if(N == 2) + ctx[0]->hash_fn = Cryptonight_hash_asm<2u, 0u>::template hash<cryptonight_r>; + else + ctx[0]->hash_fn = Cryptonight_hash_asm<N, 1u>::template hash<cryptonight_r>; } for(size_t i=1; i < N; i++) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 2b8b0e18d7d8df38dd808ac20e3e7145bb54fb35..2e6d7d1be77426505c18a81a2c8e30d4172aca9a 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -738,9 +738,17 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& std::string selected_asm = asm_version_str; if(selected_asm == "auto") selected_asm = cpu::getAsmName(N); - printer::inst()->print_msg(L0, "enable cryptonight_r asm '%s' cpu's", selected_asm.c_str()); - for(int h = 0; h < N; ++h) - ctx[h]->asm_version = selected_asm == "intel_avx" ? 1 : 2; // 1 == Intel; 2 == AMD + if(selected_asm == "off") + { + for(int h = 0; h < N; ++h) + ctx[h]->asm_version = 0; + } + else + { + printer::inst()->print_msg(L0, "enable cryptonight_r asm '%s' cpu's", selected_asm.c_str()); + for(int h = 0; h < N; ++h) + ctx[h]->asm_version = selected_asm == "intel_avx" ? 1 : 2; // 1 == Intel; 2 == AMD + } } for(int h = 1; h < N; ++h)