Skip to content
Snippets Groups Projects
Unverified Commit dc267662 authored by fireice-uk's avatar fireice-uk Committed by GitHub
Browse files

Merge pull request #2229 from psychocrypt/fix-brokenBlockchainDriverOpenCL

OpenCl: fix blockchain driver support
parents 70bb59e1 3f1cea0f
No related branches found
No related tags found
No related merge requests found
......@@ -193,12 +193,6 @@ static const __constant float ccnt[16] = {
1.4609375f
};
struct SharedMemChunk
{
int4 out[16];
float4 va[16];
};
__attribute__((reqd_work_group_size(WORKSIZE * 16, 1, 1)))
__kernel void JOIN(cn1_cn_gpu,ALGO)(__global int *lpad_in, __global int *spad, uint numThreads)
{
......@@ -215,8 +209,11 @@ __kernel void JOIN(cn1_cn_gpu,ALGO)(__global int *lpad_in, __global int *spad, u
__global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16));
#endif
__local struct SharedMemChunk smem_in[WORKSIZE];
__local struct SharedMemChunk* smem = smem_in + chunk;
__local int4 smemOutShared[16 * WORKSIZE];
__local float4 smemVaShared[16 * WORKSIZE];
__local int4* smemOut = smemOutShared + 16 * chunk;
__local float4* smemVa = smemVaShared + 16 * chunk;
uint tid = get_local_id(0) % 16;
......@@ -235,50 +232,50 @@ __kernel void JOIN(cn1_cn_gpu,ALGO)(__global int *lpad_in, __global int *spad, u
{
mem_fence(CLK_LOCAL_MEM_FENCE);
int tmp = ((__global int*)scratchpad_ptr(s, tidd, lpad))[tidm];
((__local int*)smem)[tid] = tmp;
((__local int*)smemOut)[tid] = tmp;
mem_fence(CLK_LOCAL_MEM_FENCE);
{
single_comupte_wrap(
tidm,
*(smem->out + look[tid][0]),
*(smem->out + look[tid][1]),
*(smem->out + look[tid][2]),
*(smem->out + look[tid][3]),
ccnt[tid], vs, smem->va + tid,
smem->out + tid
*(smemOut + look[tid][0]),
*(smemOut + look[tid][1]),
*(smemOut + look[tid][2]),
*(smemOut + look[tid][3]),
ccnt[tid], vs, smemVa + tid,
smemOut + tid
);
}
mem_fence(CLK_LOCAL_MEM_FENCE);
int outXor = ((__local int*)smem->out)[block];
int outXor = ((__local int*)smemOut)[block];
for(uint dd = block + 4; dd < (tidd + 1) * 16; dd += 4)
outXor ^= ((__local int*)smem->out)[dd];
outXor ^= ((__local int*)smemOut)[dd];
((__global int*)scratchpad_ptr(s, tidd, lpad))[tidm] = outXor ^ tmp;
((__local int*)smem->out)[tid] = outXor;
((__local int*)smemOut)[tid] = outXor;
float va_tmp1 = ((__local float*)smem->va)[block] + ((__local float*)smem->va)[block + 4];
float va_tmp2 = ((__local float*)smem->va)[block+ 8] + ((__local float*)smem->va)[block + 12];
((__local float*)smem->va)[tid] = va_tmp1 + va_tmp2;
float va_tmp1 = ((__local float*)smemVa)[block] + ((__local float*)smemVa)[block + 4];
float va_tmp2 = ((__local float*)smemVa)[block+ 8] + ((__local float*)smemVa)[block + 12];
((__local float*)smemVa)[tid] = va_tmp1 + va_tmp2;
mem_fence(CLK_LOCAL_MEM_FENCE);
int out2 = ((__local int*)smem->out)[tid] ^ ((__local int*)smem->out)[tid + 4 ] ^ ((__local int*)smem->out)[tid + 8] ^ ((__local int*)smem->out)[tid + 12];
va_tmp1 = ((__local float*)smem->va)[block] + ((__local float*)smem->va)[block + 4];
va_tmp2 = ((__local float*)smem->va)[block + 8] + ((__local float*)smem->va)[block + 12];
int out2 = ((__local int*)smemOut)[tid] ^ ((__local int*)smemOut)[tid + 4 ] ^ ((__local int*)smemOut)[tid + 8] ^ ((__local int*)smemOut)[tid + 12];
va_tmp1 = ((__local float*)smemVa)[block] + ((__local float*)smemVa)[block + 4];
va_tmp2 = ((__local float*)smemVa)[block + 8] + ((__local float*)smemVa)[block + 12];
va_tmp1 = va_tmp1 + va_tmp2;
va_tmp1 = fabs(va_tmp1);
float xx = va_tmp1 * 16777216.0f;
int xx_int = (int)xx;
((__local int*)smem->out)[tid] = out2 ^ xx_int;
((__local float*)smem->va)[tid] = va_tmp1 / 64.0f;
((__local int*)smemOut)[tid] = out2 ^ xx_int;
((__local float*)smemVa)[tid] = va_tmp1 / 64.0f;
mem_fence(CLK_LOCAL_MEM_FENCE);
vs = smem->va[0];
s = smem->out->x ^ smem->out->y ^ smem->out->z ^ smem->out->w;
vs = smemVa[0];
s = smemOut->x ^ smemOut->y ^ smemOut->z ^ smemOut->w;
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment