Skip to content
Snippets Groups Projects
Commit 2818a448 authored by psychocrypt's avatar psychocrypt
Browse files

NVIDIA: sqrt optimization cryptonight_v8

Avoid branche differegence
parent fd27561b
No related branches found
No related tags found
No related merge requests found
......@@ -99,8 +99,9 @@ __device__ __forceinline__ uint32_t fast_sqrt_v2(const uint64_t n1)
const uint32_t b = result & 1;
const uint64_t x2 = (uint64_t)(s) * (s + b) + ((uint64_t)(result) << 32) - n1;
if ((int64_t)(x2 + b) > 0) --result;
if ((int64_t)(x2 + 0x100000000UL + s) < 0) ++result;
const int32_t overshoot = ((int64_t)(x2 + b) > 0) ? -1 : 0;
const int32_t undershoot = ((int64_t)(x2 + 0x100000000UL + s) < 0) ? 1 : 0;
result += (overshoot+undershoot);
return result;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment