Skip to content
Snippets Groups Projects
Unverified Commit 28f27d31 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #2064 from fireice-uk/dev

release 2.6.0
parents 752fd1e7 76456d13
No related branches found
No related tags found
No related merge requests found
...@@ -85,3 +85,44 @@ After the configuration you need to compile the miner, follow the guide for your ...@@ -85,3 +85,44 @@ After the configuration you need to compile the miner, follow the guide for your
- `XMR-STAK_THREADS` give the compiler information which value for `threads` is used at runtime - `XMR-STAK_THREADS` give the compiler information which value for `threads` is used at runtime
- default is `0` (compile time optimization) - default is `0` (compile time optimization)
- if the miner is compiled and used at runtime with the some value it can increase the hash rate: `cmake .. -DXMR-STAK_THREADS=32` - if the miner is compiled and used at runtime with the some value it can increase the hash rate: `cmake .. -DXMR-STAK_THREADS=32`
#### CUDA Runtime versus CUDA SDK
nVidia packages the CUDA **runtime** with the GPU drivers, and the CUDA **SDK** should match.
While it is possible to compile with old SDK and then run on newer runtime/driver, in most cases it does not work well.
SDK usually bundles a driver that supports the particular CUDA version, but it is always best to get (usually newer)
drivers from the official site.
For Example: Built with 8.0 SDK running on a 9.2 driver crashes randomly on some GPUs, however worked fine on most 9.1
drivers. Backward compatibility "should" work, but in reality there are many cases where it does not (YMMV)
**NOTE**: The inverse case, installing CUDA 10.0 SDK on a system with older driver
does not magically add CUDA 10.0 support to the old driver. You must build with
CUDA SDK to match that driver runtime (check driver release notes PDF under 'supported technologies' list within the
first several pages) - *OR* - upgrade the driver to minimum `411.63` to have the CUDA 10.0 runtime
(unless, Fermi... they can't use CUDA 9.x or 10.0, even though newer drivers still run their *graphics* parts)
Other gotchas based on GPU family:
* Anything less than Fermi will never work
* Fermi (arch 2x) was removed after CUDA 8.0
* Volta (arch 7x) was added in CUDA 9.0
* Turing (arch 75) was added in CUDA 10.0
Here is a rough table of driver revisions and CUDA runtime contained:
| CUDA | Driver min | Driver max | notes
| ----:| ----------:| ----------:| -----
| 10.0 | 411.63 | (current) |
| 9.2 | 397.93 | 399.24 |
| 9.1 | 388.71 | 397.64 |
| 9.0 | 387.92 | 388.59 | Fermi removed (must use CUDA == 8.0)
| 8.0 | 372.70 | 386.28 | except 372.95 has CUDA7.5
| 7.5 | | | *Don't bother, won't compile anymore*
nVidia generally uses the same version numbering on all OS, the above was however based
on Windows Driver Release Notes
nVidia always puts the runtime-included CUDA version in the release notes PDF for whatever driver, doesn't hurt to
double check your specific one.
For better navigation of CUDA version matching, xmr-stak will display both version numbers during CUDA detection phases
such as `[9.2/10.0]` which is the compiled (SDK) version and the current (driver) runtime version.
\ No newline at end of file
...@@ -903,6 +903,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) ...@@ -903,6 +903,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
const char *fastIntMathV2CL = const char *fastIntMathV2CL =
#include "./opencl/fast_int_math_v2.cl" #include "./opencl/fast_int_math_v2.cl"
; ;
const char *fastDivHeavyCL =
#include "./opencl/fast_div_heavy.cl"
;
const char *cryptonightCL = const char *cryptonightCL =
#include "./opencl/cryptonight.cl" #include "./opencl/cryptonight.cl"
; ;
...@@ -924,6 +927,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) ...@@ -924,6 +927,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
std::string source_code(cryptonightCL); std::string source_code(cryptonightCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_DIV_HEAVY"), fastDivHeavyCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_AES"), wolfAesCL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_AES"), wolfAesCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_SKEIN"), wolfSkeinCL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_SKEIN"), wolfSkeinCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL);
......
...@@ -80,6 +80,8 @@ inline int amd_bfe(const uint src0, const uint offset, const uint width) ...@@ -80,6 +80,8 @@ inline int amd_bfe(const uint src0, const uint offset, const uint width)
//#include "opencl/fast_int_math_v2.cl" //#include "opencl/fast_int_math_v2.cl"
XMRSTAK_INCLUDE_FAST_INT_MATH_V2 XMRSTAK_INCLUDE_FAST_INT_MATH_V2
//#include "fast_div_heavy.cl"
XMRSTAK_INCLUDE_FAST_DIV_HEAVY
//#include "opencl/wolf-aes.cl" //#include "opencl/wolf-aes.cl"
XMRSTAK_INCLUDE_WOLF_AES XMRSTAK_INCLUDE_WOLF_AES
//#include "opencl/wolf-skein.cl" //#include "opencl/wolf-skein.cl"
...@@ -802,14 +804,14 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ...@@ -802,14 +804,14 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
#if (ALGO == 4 || ALGO == 10) #if (ALGO == 4 || ALGO == 10)
long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4)))); long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4))));
int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2]; int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2];
long q = n / (d | 0x5); long q = fast_div_heavy(n, d | 0x5);
*((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q; *((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q;
idx0 = (d ^ q) & MASK; idx0 = (d ^ q) & MASK;
// cryptonight_haven // cryptonight_haven
#elif (ALGO == 9) #elif (ALGO == 9)
long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4)))); long n = *((__global long*)(Scratchpad + (IDX((idx0) >> 4))));
int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2]; int d = ((__global int*)(Scratchpad + (IDX((idx0) >> 4))))[2];
long q = n / (d | 0x5); long q = fast_div_heavy(n, d | 0x5);
*((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q; *((__global long*)(Scratchpad + (IDX((idx0) >> 4)))) = n ^ q;
idx0 = ((~d) ^ q) & MASK; idx0 = ((~d) ^ q) & MASK;
#endif #endif
......
R"===(
#ifndef FAST_DIV_HEAVY_CL
#define FAST_DIV_HEAVY_CL
inline ulong get_reciprocal_heavy(uint a)
{
const uint shift = clz(a);
a <<= shift;
const float a_hi = as_float((a >> 8) + 1 + ((126U + 31U) << 23));
const float a_lo = convert_float_rte(as_int(a & 0xFF) - 256);
const float r = native_recip(a_hi);
const uint tmp0 = as_uint(r);
const uint tmp1 = tmp0 + ((shift + 2 + 64U) << 23);
const float r_scaled = as_float(tmp1);
const float h = fma(a_lo, r, fma(a_hi, r, -1.0f));
const float r_scaled_hi = as_float(tmp1 & ~4095U);
const float h_hi = as_float(as_uint(h) & ~4095U);
const float r_scaled_lo = r_scaled - r_scaled_hi;
const float h_lo = h - h_hi;
const float x1 = h_hi * r_scaled_hi;
const float x2 = h_lo * r_scaled + h_hi * r_scaled_lo;
const long h1 = convert_long_rte(x1);
const int h2 = convert_int_rtp(x2) - convert_int_rtn(h * (x1 + x2));
const ulong result = tmp0 & 0xFFFFFF;
return (result << (shift + 9)) - ((h1 + h2) >> 2);
}
inline long fast_div_heavy(long _a, int _b)
{
const ulong a = abs(_a);
const uint b = abs(_b);
ulong q = mul_hi(a, get_reciprocal_heavy(b));
const long tmp = a - q * b;
const int overshoot = (tmp < 0) ? 1 : 0;
const int undershoot = (tmp >= b) ? 1 : 0;
q += undershoot - overshoot;
return ((as_int2(_a).s1 ^ _b) < 0) ? -q : q;
}
#endif
)==="
\ No newline at end of file
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#endif #endif
#define XMR_STAK_NAME "xmr-stak" #define XMR_STAK_NAME "xmr-stak"
#define XMR_STAK_VERSION "2.5.2" #define XMR_STAK_VERSION "2.6.0"
#if defined(_WIN32) #if defined(_WIN32)
#define OS_TYPE "win" #define OS_TYPE "win"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment