Skip to content
Snippets Groups Projects
Commit efc8dc44 authored by psychocrypt's avatar psychocrypt
Browse files

optimize aes round command queue

unroll AES round by hand to help the compiler to optimize the command queue
parent 2920e9a3
No related branches found
No related tags found
No related merge requests found
......@@ -74,15 +74,29 @@ static const __constant uint AES0_C[256] =
#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))
uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, const uint4 key)
uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, uint4 key)
{
uint4 Y;
Y.s0 = AES0[BYTE(X.s0, 0)] ^ AES1[BYTE(X.s1, 1)] ^ AES2[BYTE(X.s2, 2)] ^ AES3[BYTE(X.s3, 3)];
Y.s1 = AES0[BYTE(X.s1, 0)] ^ AES1[BYTE(X.s2, 1)] ^ AES2[BYTE(X.s3, 2)] ^ AES3[BYTE(X.s0, 3)];
Y.s2 = AES0[BYTE(X.s2, 0)] ^ AES1[BYTE(X.s3, 1)] ^ AES2[BYTE(X.s0, 2)] ^ AES3[BYTE(X.s1, 3)];
Y.s3 = AES0[BYTE(X.s3, 0)] ^ AES1[BYTE(X.s0, 1)] ^ AES2[BYTE(X.s1, 2)] ^ AES3[BYTE(X.s2, 3)];
Y ^= key;
return(Y);
key.s0 ^= AES0[BYTE(X.s0, 0)];
key.s1 ^= AES0[BYTE(X.s1, 0)];
key.s2 ^= AES0[BYTE(X.s2, 0)];
key.s3 ^= AES0[BYTE(X.s3, 0)];
key.s0 ^= AES2[BYTE(X.s2, 2)];
key.s1 ^= AES2[BYTE(X.s3, 2)];
key.s2 ^= AES2[BYTE(X.s0, 2)];
key.s3 ^= AES2[BYTE(X.s1, 2)];
key.s0 ^= AES1[BYTE(X.s1, 1)];
key.s1 ^= AES1[BYTE(X.s2, 1)];
key.s2 ^= AES1[BYTE(X.s3, 1)];
key.s3 ^= AES1[BYTE(X.s0, 1)];
key.s0 ^= AES3[BYTE(X.s3, 3)];
key.s1 ^= AES3[BYTE(X.s0, 3)];
key.s2 ^= AES3[BYTE(X.s1, 3)];
key.s3 ^= AES3[BYTE(X.s2, 3)];
return key;
}
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment