Skip to content
Snippets Groups Projects
Unverified Commit 26830090 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #2080 from psychocrypt/topic-reduceSharedMemUsage

OpenCL: reduce local mem footprint
parents a7e30eb5 6f283928
No related branches found
No related tags found
No related merge requests found
...@@ -562,7 +562,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ...@@ -562,7 +562,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
ulong b[2]; ulong b[2];
uint4 b_x[1]; uint4 b_x[1];
#endif #endif
__local uint AES0[256], AES1[256], AES2[256], AES3[256]; __local uint AES0[256], AES1[256];
// cryptonight_monero_v8 // cryptonight_monero_v8
#if(ALGO==11) #if(ALGO==11)
...@@ -577,8 +577,6 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ...@@ -577,8 +577,6 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
const uint tmp = AES0_C[i]; const uint tmp = AES0_C[i];
AES0[i] = tmp; AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U); AES1[i] = rotate(tmp, 8U);
AES2[i] = rotate(tmp, 16U);
AES3[i] = rotate(tmp, 24U);
// cryptonight_monero_v8 // cryptonight_monero_v8
#if(ALGO==11) #if(ALGO==11)
RCP[i] = RCP_C[i]; RCP[i] = RCP_C[i];
...@@ -653,9 +651,9 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ...@@ -653,9 +651,9 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
((uint4 *)c)[0] = SCRATCHPAD_CHUNK(0); ((uint4 *)c)[0] = SCRATCHPAD_CHUNK(0);
// cryptonight_bittube2 // cryptonight_bittube2
#if(ALGO == 10) #if(ALGO == 10)
((uint4 *)c)[0] = AES_Round_bittube2(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); ((uint4 *)c)[0] = AES_Round2(AES0, AES1, ~((uint4 *)c)[0], ((uint4 *)a)[0]);
#else #else
((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); ((uint4 *)c)[0] = AES_Round2(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
#endif #endif
// cryptonight_monero_v8 // cryptonight_monero_v8
......
...@@ -74,42 +74,49 @@ static const __constant uint AES0_C[256] = ...@@ -74,42 +74,49 @@ static const __constant uint AES0_C[256] =
#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U)) #define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))
inline uint4 AES_Round_bittube2(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, uint4 x, uint4 k)
{
x = ~x;
k.s0 ^= AES0[BYTE(x.s0, 0)] ^ AES1[BYTE(x.s1, 1)] ^ AES2[BYTE(x.s2, 2)] ^ AES3[BYTE(x.s3, 3)];
x.s0 ^= k.s0;
k.s1 ^= AES0[BYTE(x.s1, 0)] ^ AES1[BYTE(x.s2, 1)] ^ AES2[BYTE(x.s3, 2)] ^ AES3[BYTE(x.s0, 3)];
x.s1 ^= k.s1;
k.s2 ^= AES0[BYTE(x.s2, 0)] ^ AES1[BYTE(x.s3, 1)] ^ AES2[BYTE(x.s0, 2)] ^ AES3[BYTE(x.s1, 3)];
x.s2 ^= k.s2;
k.s3 ^= AES0[BYTE(x.s3, 0)] ^ AES1[BYTE(x.s0, 1)] ^ AES2[BYTE(x.s1, 2)] ^ AES3[BYTE(x.s2, 3)];
return k;
}
uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key) uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key)
{ {
key.s0 ^= AES0[BYTE(X.s0, 0)]; key.s0 ^= AES0[BYTE(X.s0, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)]; key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)]; key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)]; key.s3 ^= AES0[BYTE(X.s3, 0)];
key.s0 ^= AES2[BYTE(X.s2, 2)]; key.s0 ^= AES2[BYTE(X.s2, 2)];
key.s1 ^= AES2[BYTE(X.s3, 2)]; key.s1 ^= AES2[BYTE(X.s3, 2)];
key.s2 ^= AES2[BYTE(X.s0, 2)]; key.s2 ^= AES2[BYTE(X.s0, 2)];
key.s3 ^= AES2[BYTE(X.s1, 2)]; key.s3 ^= AES2[BYTE(X.s1, 2)];
key.s0 ^= AES1[BYTE(X.s1, 1)]; key.s0 ^= AES1[BYTE(X.s1, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)]; key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)]; key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)]; key.s3 ^= AES1[BYTE(X.s0, 1)];
key.s0 ^= AES3[BYTE(X.s3, 3)]; key.s0 ^= AES3[BYTE(X.s3, 3)];
key.s1 ^= AES3[BYTE(X.s0, 3)]; key.s1 ^= AES3[BYTE(X.s0, 3)];
key.s2 ^= AES3[BYTE(X.s1, 3)]; key.s2 ^= AES3[BYTE(X.s1, 3)];
key.s3 ^= AES3[BYTE(X.s2, 3)]; key.s3 ^= AES3[BYTE(X.s2, 3)];
return key;
}
uint4 AES_Round2(const __local uint *AES0, const __local uint *AES1, const uint4 X, uint4 key)
{
key.s0 ^= AES0[BYTE(X.s0, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)];
key.s0 ^= rotate(AES0[BYTE(X.s2, 2)] ^ AES1[BYTE(X.s3, 3)], 16u);
key.s1 ^= rotate(AES0[BYTE(X.s3, 2)] ^ AES1[BYTE(X.s0, 3)], 16u);
key.s2 ^= rotate(AES0[BYTE(X.s0, 2)] ^ AES1[BYTE(X.s1, 3)], 16u);
key.s3 ^= rotate(AES0[BYTE(X.s1, 2)] ^ AES1[BYTE(X.s2, 3)], 16u);
key.s0 ^= AES1[BYTE(X.s1, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)];
return key; return key;
} }
#endif #endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment