Skip to content
Snippets Groups Projects
Commit 21ce0385 authored by psychocrypt's avatar psychocrypt
Browse files

add cpu family and model detection


Helper functions to select the asm version based on the number of used hashes per threads and the family name of the cpu.

- use the noew cpu type functions to fix the wrong AMD family detection in `autoAdjust.hpp`
- allow to set the asm version to `auto`
- rename asm option `intel` to `intel_avx`
- rename asm option `ryzen` to `amd_avx`

Co-authored-by: default avatarfireice-uk <fireice-uk@users.noreply.github.com>
parent a6ecf8d4
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@
#include "xmrstak/misc/configEditor.hpp"
#include "xmrstak/params.hpp"
#include "xmrstak/backend/cryptonight.hpp"
#include "xmrstak/backend/cpu/cpuType.hpp"
#include <string>
#ifdef _WIN32
......@@ -20,14 +21,6 @@ namespace xmrstak
{
namespace cpu
{
// Mask bits between h and l and return the value
// This enables us to put in values exactly like in the manual
// For example EBX[31:22] is get_masked(cpu_info[1], 31, 22)
inline int32_t get_masked(int32_t val, int32_t h, int32_t l)
{
val &= (0x7FFFFFFF >> (31-(h-l))) << l;
return val >> l;
}
class autoAdjust
{
......@@ -82,7 +75,7 @@ public:
conf += std::string(" { \"low_power_mode\" : ");
conf += std::string(double_mode ? "true" : "false");
conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"off\", \"affine_to_cpu\" : ");
conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"auto\", \"affine_to_cpu\" : ");
conf += std::to_string(aff_id);
conf += std::string(" },\n");
......@@ -143,7 +136,8 @@ private:
L3KB_size = get_masked(cpu_info[3], 31, 18) * 512;
::jconf::cpuid(1, 0, cpu_info);
if(get_masked(cpu_info[0], 11, 8) < 0x17) //0x17h is Zen
if(getModel().family < 0x17) //0x17h is Zen
old_amd = true;
return true;
......
......@@ -70,7 +70,7 @@ public:
{
conf += std::string(" { \"low_power_mode\" : ");
conf += std::string((id & 0x8000000) != 0 ? "true" : "false");
conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"off\", \"affine_to_cpu\" : ");
conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"auto\", \"affine_to_cpu\" : ");
conf += std::to_string(id & 0x7FFFFFF);
conf += std::string(" },\n");
}
......
......@@ -11,10 +11,11 @@ R"===(// generated by XMRSTAK_VERSION
* no_prefetch - Some systems can gain up to extra 5% here, but sometimes it will have no difference or make
* things slower.
*
* asm - Allow to switch to a assembler version of cryptonight_v8; allowed value [off, intel, ryzen]
* - off: used the default implementation (no assembler version)
* - intel: supports Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
* - ryzen: AMD Ryzen (1xxx and 2xxx series)
* asm - Allow to switch to a assembler version of cryptonight_v8; allowed value [auto, off, intel_avx, amd_avx]
* - auto: xmr-stak will automatically detect the asm type (default)
* - off: disable the usage of optimized assembler
* - intel_avx: supports Intel cpus with avx instructions e.g. Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx
* - amd_avx: supports AMD cpus with avx instructions e.g. AMD Ryzen 1xxx and 2xxx series
*
* affine_to_cpu - This can be either false (no affinity), or the CPU core number. Note that on hyperthreading
* systems it is better to assign threads to physical cores. On Windows this usually means selecting
......@@ -27,8 +28,8 @@ R"===(// generated by XMRSTAK_VERSION
* A filled out configuration should look like this:
* "cpu_threads_conf" :
* [
* { "low_power_mode" : false, "no_prefetch" : true, "asm" : "off", "affine_to_cpu" : 0 },
* { "low_power_mode" : false, "no_prefetch" : true, "asm" : "off", "affine_to_cpu" : 1 },
* { "low_power_mode" : false, "no_prefetch" : true, "asm" : "auto", "affine_to_cpu" : 0 },
* { "low_power_mode" : false, "no_prefetch" : true, "asm" : "auto", "affine_to_cpu" : 1 },
* ],
* If you do not wish to mine with your CPU(s) then use:
* "cpu_threads_conf" :
......
#include "xmrstak/backend/cpu/cpuType.hpp"
#include <cstring>
#include <inttypes.h>
#include <cstdio>
#ifdef _WIN32
#define strcasecmp _stricmp
#include <intrin.h>
#else
#include <cpuid.h>
#endif
namespace xmrstak
{
namespace cpu
{
void cpuid(uint32_t eax, int32_t ecx, int32_t val[4])
{
std::memset(val, 0, sizeof(int32_t)*4);
#ifdef _WIN32
__cpuidex(val, eax, ecx);
#else
__cpuid_count(eax, ecx, val[0], val[1], val[2], val[3]);
#endif
}
int32_t get_masked(int32_t val, int32_t h, int32_t l)
{
val &= (0x7FFFFFFF >> (31-(h-l))) << l;
return val >> l;
}
bool has_feature(int32_t val, int32_t bit)
{
int32_t mask = 1 << bit;
return (val & mask) != 0u;
}
Model getModel()
{
int32_t cpu_info[4];
char cpustr[13] = {0};
cpuid(0, 0, cpu_info);
std::memcpy(cpustr, &cpu_info[1], 4);
std::memcpy(cpustr+4, &cpu_info[3], 4);
std::memcpy(cpustr+8, &cpu_info[2], 4);
Model result;
cpuid(1, 0, cpu_info);
result.family = get_masked(cpu_info[0], 12, 8);
result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4;
result.type_name = cpustr;
// feature bits https://en.wikipedia.org/wiki/CPUID
// sse2
result.sse2 = has_feature(cpu_info[3], 26);
// aes-ni
result.aes = has_feature(cpu_info[2], 25);
// avx
result.avx = has_feature(cpu_info[2], 28);
if(strcmp(cpustr, "AuthenticAMD") == 0)
{
if(result.family == 0xF)
result.family += get_masked(cpu_info[0], 28, 20);
}
return result;
}
} // namespace cpu
} // namespace xmrstak
#pragma once
#include <string>
#include <cstdint>
namespace xmrstak
{
namespace cpu
{
struct Model
{
uint32_t family = 0u;
uint32_t model = 0u;
bool aes = false;
bool sse2 = false;
bool avx = false;
std::string type_name = "unknown";
};
Model getModel();
/** Mask bits between h and l and return the value
*
* This enables us to put in values exactly like in the manual
* For example EBX[30:22] is get_masked(cpu_info[1], 31, 22)
*/
int32_t get_masked(int32_t val, int32_t h, int32_t l);
} // namespace cpu
} // namespace xmrstak
......@@ -27,6 +27,7 @@
#include "xmrstak/backend/iBackend.hpp"
#include "xmrstak/backend/globalStates.hpp"
#include "xmrstak/misc/configEditor.hpp"
#include "xmrstak/backend/cpu/cpuType.hpp"
#include "xmrstak/params.hpp"
#include "jconf.hpp"
......@@ -449,35 +450,33 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
return pvThreads;
}
/** get the supported asm name
*
* @return asm type based on the number of hashes per thread the internal
* evaluated cpu type
*/
static std::string getAsmName(const uint32_t num_hashes)
{
std::string asm_type = "off";
if(num_hashes == 0)
return asm_type;
auto cpu_model = getModel();
if(cpu_model.avx && cpu_model.aes)
{
if(cpu_model.type_name.find("Intel") != std::string::npos)
asm_type = "intel_avx";
else if(cpu_model.type_name.find("AMD") != std::string::npos && num_hashes == 1)
asm_type = "amd_avx";
}
}
template<size_t N>
minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo, const std::string& asm_version_str)
{
static_assert(N >= 1, "number of threads must be >= 1" );
// check for asm optimized version for cryptonight_v8
if(N <= 2 && algo == cryptonight_monero_v8 && bHaveAes)
{
if(asm_version_str != "off")
{
if(asm_version_str != "intel" && asm_version_str != "ryzen")
printer::inst()->print_msg(L1, "Assembler %s unknown, fallback to non asm version of cryptonight_v8", asm_version_str.c_str());
if(asm_version_str == "intel")
{
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
if(N == 1)
return Cryptonight_hash_asm<1u, 0u>::template hash<cryptonight_monero_v8>;
else if(N == 2)
return Cryptonight_hash_asm<2u, 0u>::template hash<cryptonight_monero_v8>;
}
// supports only 1 thread per hash
if(N == 1 && asm_version_str == "ryzen")
{
// AMD Ryzen (1xxx and 2xxx series)
return Cryptonight_hash_asm<1u, 1u>::template hash<cryptonight_monero_v8>;
}
}
}
// We have two independent flag bits in the functions
// therefore we will build a binary digit and select the
// function as a two digit binary
......@@ -584,7 +583,39 @@ minethd::cn_hash_fun minethd::func_multi_selector(bool bHaveAes, bool bNoPrefetc
digit.set(0, !bHaveAes);
digit.set(1, !bNoPrefetch);
return func_table[ algv << 2 | digit.to_ulong() ];
auto selected_function = func_table[ algv << 2 | digit.to_ulong() ];
// check for asm optimized version for cryptonight_v8
if(N <= 2 && algo == cryptonight_monero_v8 && bHaveAes)
{
std::string selected_asm = asm_version_str;
if(selected_asm == "auto")
selected_asm = cpu::getAsmName(N);
if(selected_asm != "off")
{
if(selected_asm == "intel_avx")
{
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
if(N == 1)
selected_function = Cryptonight_hash_asm<1u, 0u>::template hash<cryptonight_monero_v8>;
else if(N == 2)
selected_function = Cryptonight_hash_asm<2u, 0u>::template hash<cryptonight_monero_v8>;
}
// supports only 1 thread per hash
if(N == 1 && selected_asm == "amd_avx")
{
// AMD Ryzen (1xxx and 2xxx series)
selected_function = Cryptonight_hash_asm<1u, 1u>::template hash<cryptonight_monero_v8>;
}
if(asm_version_str == "auto" && (selected_asm != "intel_avx" || selected_asm != "amd_avx"))
printer::inst()->print_msg(L3, "Switch to assembler version for '%s' cpu's", selected_asm.c_str());
else if(selected_asm != "intel_avx" || selected_asm != "amd_avx") // unknown asm type
printer::inst()->print_msg(L1, "Assembler '%s' unknown, fallback to non asm version of cryptonight_v8", selected_asm.c_str());
}
}
return selected_function;
}
minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment