diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3b3c7eb6bdcf248e63c74aa428fc71b26b4235b4..15a2684ca18cec152038814768c56dbacb5add5e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,10 +36,6 @@ if(NOT CMAKE_BUILD_TYPE)
 endif()
 set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${BUILD_TYPE}")
 
-set(XMR-STAK_CURRENCY "all" CACHE STRING "select miner currency")
-set_property(CACHE XMR-STAK_CURRENCY PROPERTY STRINGS "all;monero;aeon")
-
-
 set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture")
 set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic")
 if(XMR-STAK_COMPILE STREQUAL "native")
@@ -53,16 +49,6 @@ else()
     message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'")
 endif()
 
-if(XMR-STAK_CURRENCY STREQUAL "all")
-    message(STATUS "Set miner currency to 'monero' and 'aeon'")
-elseif(XMR-STAK_CURRENCY STREQUAL "aeon")
-    message(STATUS "Set miner currency to 'aeon'")
-    add_definitions("-DCONF_NO_MONERO=1")
-elseif(XMR-STAK_CURRENCY STREQUAL "monero")
-    message(STATUS "Set miner currency to 'monero'")
-    add_definitions("-DCONF_NO_AEON=1")
-endif()
-
 # option to add static libgcc and libstdc++
 option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF)
 
@@ -438,6 +424,14 @@ else()
 
 endif()
 
+# add -Wall for debug builds with gcc
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
+    endif()
+endif()
+
 # activate static libgcc and libstdc++ linking
 if(CMAKE_LINK_STATIC)
     set(BUILD_SHARED_LIBRARIES OFF)
diff --git a/README.md b/README.md
index bdcc6cf720cd4ca1cc10689601a720cedbd5a774..d1ec2554497458769e58c0406a3e6b5b91ba4380 100644
--- a/README.md
+++ b/README.md
@@ -38,14 +38,20 @@ XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NV
 
 ## Supported altcoins
 
-Besides Monero, following coins can be mined using this miner:
+Besides [Monero](https://getmonero.org), following coins can be mined using this miner:
 
-- [Aeon](http://www.aeon.cash/)
+- [Aeon](http://www.aeon.cash)
+- [Edollar](https://edollar.cash)
 - [Electroneum](https://electroneum.com)
+- [Graft](https://www.graft.network)
 - [Intense](https://intensecoin.com)
+- [Karbo](https://karbo.io)
 - [Sumokoin](https://www.sumokoin.org)
 
-For all coins, except Aeon, you can use Monero settings.
+If your prefered coin is not listed, you can chose one of the following algorithms:
+
+- Cryptonight - 2 MiB scratchpad memory
+- Cryptonight-light - 1 MiB scratchpad memory
 
 Please note, this list is not complete, and is not an endorsement.
 
diff --git a/doc/FAQ.md b/doc/FAQ.md
index ffbc36fcc28dcc6d82e2303f3a80ae679a35179d..f22bb53532a6354f243f38764c0c5e6d4d8d73b4 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -8,6 +8,9 @@
 * [Illegal instruction (core dumped)](#illegal-instruction)
 * [Virus Protection Alert](#virus-protection-alert)
 * [Change Currency to Mine](#change-currency-to-mine)
+* [How can I mine Monero](#how-can-i-mine-monero)
+* [Why is Monero named monero2](why-is-monero-named-monero2)
+* [Which currency must be chosen if my fork coin is not listed](#which-currency-must-be-chosen-if-my-fork-coin-is-not-listed)
 
 ## "Obtaining SeLockMemoryPrivilege failed."
 
@@ -70,3 +73,16 @@ If your antivirus software flags **xmr-stak**, it will likely move it to its qua
 If the miner is compiled for Monero and Aeon than you can change
  - the value `currency` in the config *or*
  - start the miner with the [command line option](usage.md) `--currency monero` or `--currency aeon`
+
+## How can I mine Monero
+
+Set the value `currency` in `pools.txt` to `monero2`.
+
+## Why is Monero named monero2
+
+To avoid configuration conflicts after the hard fork of Monero to the new POW with our old naming schema where all cryptonight currencies was selected by choosing `monero` as currency we decided to switch to the name `monero2`.
+
+## Which currency must be chosen if my fork coin is not listed
+
+If your coin you want to mine is not listed please check the documentation of the coin and try to find out if `cryptonight` or `cryptonight-lite` is the used algorithm.
+Select one of these generic coin algorithms.
diff --git a/doc/compile.md b/doc/compile.md
index 771c9d1a4fbd626398aa167cce2d739556d0ab54..984c013ac45dd6c111dd942ca6202e2ead80338d 100644
--- a/doc/compile.md
+++ b/doc/compile.md
@@ -47,7 +47,6 @@ After the configuration you need to compile the miner, follow the guide for your
   - there is no *http* interface available if option is disabled: `cmake .. -DMICROHTTPD_ENABLE=OFF`
 - `OpenSSL_ENABLE` allow to disable/enable the dependency *OpenSSL*
   - it is not possible to connect to a *https* secured pool if option is disabled: `cmake .. -DOpenSSL_ENABLE=OFF`
-- `XMR-STAK_CURRENCY` - compile for Monero(XMR) or Aeon(AEON) usage only e.g. `cmake .. -DXMR-STAK_CURRENCY=monero`
 - `XMR-STAK_COMPILE` select the CPU compute architecture (default: native)
   - native means the miner binary can be used only on the system where it is compiled but will archive the highest hash rate
   - use `cmake .. -DXMR-STAK_COMPILE=generic` to run the miner on all CPU's with sse2
diff --git a/doc/tuning.md b/doc/tuning.md
index 5125387d0b7769caae06949d05c17002796df3a4..47ad0bb2c4948c97081b62ec90cb114af2107a75 100644
--- a/doc/tuning.md
+++ b/doc/tuning.md
@@ -1,6 +1,7 @@
 # Tuning Guide
 
 ## Content Overview
+* [Benchmark](#benchmark)
 * [Windows](#windows)
 * [NVIDIA Backend](#nvidia-backend)
   * [Choose Value for `threads` and `blocks`](#choose-value-for-threads-and-blocks)
@@ -8,11 +9,18 @@
 * [AMD Backend](#amd-backend)
   * [Choose `intensity` and `worksize`](#choose-intensity-and-worksize)
   * [Add more GPUs](#add-more-gpus)
+  * [disable comp_mode](#disable-comp_mode)
+  * [change the scratchpad memory pattern](change-the-scratchpad-memory-pattern)
   * [Increase Memory Pool](#increase-memory-pool)
   * [Scratchpad Indexing](#scratchpad-indexing)
 * [CPU Backend](#cpu-backend)
   * [Choose Value for `low_power_mode`](#choose-value-for-low_power_mode)
 
+## Benchmark
+To benchmark the miner speed there are two ways.
+  - Mine against a pool end press the key `h` after 30 sec to see the hash report.
+  - Start the miner with the cli option `--benchmark BLOCKVERSION`. The miner will not connect to any pool and performs a 60sec performance benchmark with all enabled back-ends.
+
 ## Windows
 "Run As Administrator" prompt (UAC) confirmation is needed to use large pages on Windows 7.
 On Windows 10 it is only needed once to set up the account to use them.
@@ -46,8 +54,12 @@ To add a new GPU you need to add a new config set to `gpu_threads_conf`.
 ```
 "gpu_threads_conf" :
 [
-    { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0, "affine_to_cpu" : false},
-    { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0, "affine_to_cpu" : false},
+    { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0,
+      "affine_to_cpu" : false, "sync_mode" : 3,
+    },
+    { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" :  0,
+      "affine_to_cpu" : false, "sync_mode" : 3,
+    },
 ],
 ```
 
@@ -70,13 +82,26 @@ If you are unsure of either GPU or platform index value, you can use `clinfo` to
 ```
 "gpu_threads_conf" :
 [
-    { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
-    { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
+    { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, 
+      "strided_index" : true, "mem_chunk" : 2, "comp_mode" : true
+    },
+    { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false, 
+      "strided_index" : true, "mem_chunk" : 2, "comp_mode" : true
+    },
 ],
 
 "platform_index" : 0,
 ```
 
+### disable comp_mode
+
+`comp_mode` means compatibility mode and removes some checks in compute kernel those takes care that the miner can be used on a wide range of AMD/OpenCL GPU devices.
+To avoid miner crashes the `intensity` should be a multiple of `worksize` if `comp_mode` is `false`.
+
+### change the scratchpad memory pattern
+
+By changing `strided_index` to `2` the number of contiguous elements (a 16 byte) for one miner thread can be fine tuned with the option `mem_chunk`.
+
 ### Increase Memory Pool
 
 By setting the following environment variables before the miner is started OpenCl allows the miner to more threads.
@@ -84,9 +109,9 @@ This variables must be set each time before the miner is started else it could b
 
 ```
 export GPU_FORCE_64BIT_PTR=1
-export GPU_MAX_HEAP_SIZE=99
-export GPU_MAX_ALLOC_PERCENT=99
-export GPU_SINGLE_ALLOC_PERCENT=99
+export GPU_MAX_HEAP_SIZE=100
+export GPU_MAX_ALLOC_PERCENT=100
+export GPU_SINGLE_ALLOC_PERCENT=100
 ```
 
 *Note:* Windows user must use `set` instead of `export` to define an environment variable.
diff --git a/doc/usage.md b/doc/usage.md
index a81046931ff690d5d0791eaa61a8dabc72bc4758..1f1fb096155a1cb9a5ced42276d8254840ab1bfc 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -5,7 +5,7 @@
 * [Usage on Windows](#usage-on-windows)
 * [Usage on Linux](#usage-on-linux)
 * [Command Line Options](#command-line-options)
-* [HTML and JSON API report configuraton](#xx)
+* [HTML and JSON API report configuraton](#html-and-json-api-report-configuraton)
 
 ## Configurations
 
@@ -13,12 +13,13 @@ Before you started the miner the first time there are no config files available.
 Config files will be created at the first start.
 The number of files depends on the available backends.
 `config.txt` contains the common miner settings.
+`pools.txt` contains the selected mining pools and currency to mine.
 `amd.txt`, `cpu.txt` and `nvidia.txt` contains miner backend specific settings and can be used for further tuning ([Tuning Guide](tuning.md)).
 
 
 ## Usage on Windows
 1) Double click the `xmr-stak.exe` file
-2) Fill in the pool url, username and password
+2) Fill in the pool url settings, currency, username and password
 
 `set XMRSTAK_NOWAIT=1` disable the dialog `Press any key to exit.` for non UAC execution.
 
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index c45f211cde6bdccbf4f1fd239d6bb9ad23d50ce2..7547083bad8b78cd8f575ea249ab865e647e9d38 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -306,21 +306,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		return ERR_OCL_API;
 	}
 
-	size_t hashMemSize;
-	int threadMemMask;
-	int hasIterations;
-	if(::jconf::inst()->IsCurrencyMonero())
-	{
-		hashMemSize = MONERO_MEMORY;
-		threadMemMask = MONERO_MASK;
-		hasIterations = MONERO_ITER;
-	}
-	else
-	{
-		hashMemSize = AEON_MEMORY;
-		threadMemMask = AEON_MASK;
-		hasIterations = AEON_ITER;
-	}
+	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+	int threadMemMask = cn_select_mask(::jconf::inst()->GetMiningAlgo());
+	int hashIterations = cn_select_iter(::jconf::inst()->GetMiningAlgo());
 
 	size_t g_thd = ctx->rawIntensity;
 	ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, hashMemSize * g_thd, NULL, &ret);
@@ -384,11 +372,13 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		return ERR_OCL_API;
 	}
 
-	char options[256];
-	snprintf(options, sizeof(options),
-		"-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d  -DCOMP_MODE=%d",
-		hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk), ctx->compMode ? 1 : 0);
+	auto miner_algo = ::jconf::inst()->GetMiningAlgo();
 
+	char options[512];
+	snprintf(options, sizeof(options),
+		"-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d  -DCOMP_MODE=%d -DMEMORY=%llu -DALGO=%d",
+		hashIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk), ctx->compMode ? 1 : 0, 
+		int_port(hashMemSize), int(miner_algo));
 	/* create a hash for the compile time cache
 	 * used data:
 	 *   - source code
@@ -529,8 +519,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 		}
 	}
 
-	const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" };
-	for(int i = 0; i < 7; ++i)
+	const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein", "cn1_monero" };
+	for(int i = 0; i < 8; ++i)
 	{
 		ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret);
 		if(ret != CL_SUCCESS)
@@ -887,7 +877,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
 	return ERR_SUCCESS;
 }
 
-size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target)
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version)
 {
 	cl_int ret;
 
@@ -932,29 +922,65 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
 		return(ERR_OCL_API);
 	}
 
-	// CN2 Kernel
+	if(miner_algo == cryptonight_heavy)
+	{
+		// version
+		if ((ret = clSetKernelArg(ctx->Kernels[0], 4, sizeof(cl_uint), &version)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 4.", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+	}
+
+	// CN1 Kernel
+
+	/// @todo only activate if currency is monero
+	int cn_kernel_offset = 0;
+	if(miner_algo == cryptonight_monero && version >= 7)
+	{
+		cn_kernel_offset = 6;
+	}
 
 	// Scratchpads
-	if((ret = clSetKernelArg(ctx->Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
+	if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret));
 		return ERR_OCL_API;
 	}
 
 	// States
-	if((ret = clSetKernelArg(ctx->Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
+	if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret));
 		return ERR_OCL_API;
 	}
 
 	// Threads
-	if((ret = clSetKernelArg(ctx->Kernels[1], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS)
+	if((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
 		return(ERR_OCL_API);
 	}
 
+	if(miner_algo == cryptonight_monero && version >= 7)
+	{
+		// Input
+		if ((ret = clSetKernelArg(ctx->Kernels[1 + cn_kernel_offset], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, arugment 4(input buffer).", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+	}
+	else if(miner_algo == cryptonight_heavy)
+	{
+		// version
+		if ((ret = clSetKernelArg(ctx->Kernels[1], 3, sizeof(cl_uint), &version)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 3 (version).", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+	}
+
 	// CN3 Kernel
 	// Scratchpads
 	if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
@@ -1005,6 +1031,16 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
 		return(ERR_OCL_API);
 	}
 
+	if(miner_algo == cryptonight_heavy)
+	{
+		// version
+		if ((ret = clSetKernelArg(ctx->Kernels[2], 7, sizeof(cl_uint), &version)) != CL_SUCCESS)
+		{
+			printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 7.", err_to_str(ret));
+			return ERR_OCL_API;
+		}
+	}
+
 	for(int i = 0; i < 4; ++i)
 	{
 		// States
@@ -1039,7 +1075,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
 	return ERR_SUCCESS;
 }
 
-size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
+size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version)
 {
 	cl_int ret;
 	cl_uint zero = 0;
@@ -1092,7 +1128,13 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 	}*/
 
 	size_t tmpNonce = ctx->Nonce;
-	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+	/// @todo only activate if currency is monero
+	int cn_kernel_offset = 0;
+	if(miner_algo == cryptonight_monero && version >= 7)
+	{
+		cn_kernel_offset = 6;
+	}
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1 + cn_kernel_offset], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
 		return ERR_OCL_API;
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 8fb71688798199dd89719f1aa1885542f5617005..a387b1535dd0b2c701e8d6043c7d7eac45b45eaf 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "xmrstak/misc/console.hpp"
+#include "xmrstak/jconf.hpp"
 
 #if defined(__APPLE__)
 #include <OpenCL/cl.h>
@@ -35,7 +36,7 @@ struct GpuContext
 	cl_mem OutputBuffer;
 	cl_mem ExtraBuffers[6];
 	cl_program Program;
-	cl_kernel Kernels[7];
+	cl_kernel Kernels[8];
 	size_t freeMem;
 	int computeUnits;
 	std::string name;
@@ -49,7 +50,7 @@ int getAMDPlatformIdx();
 std::vector<GpuContext> getAMDDevices(int index);
 
 size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
-size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target);
-size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput);
+size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo, uint32_t version);
+size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo, uint32_t version);
 
 
diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
index 9383b04905406a7f963369c7261a459ef47c9057..7a363570bad7dbe7c32656d176819de4b479d8ac 100644
--- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
+++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
@@ -433,8 +433,18 @@ inline ulong getIdx()
 #endif
 }
 
+inline uint4 mix_and_propagate(__local uint4 xin[8][WORKSIZE])
+{
+	return xin[(get_local_id(1)) % 8][get_local_id(0)] ^ xin[(get_local_id(1) + 1) % 8][get_local_id(0)];
+}
+
 __attribute__((reqd_work_group_size(WORKSIZE, 8, 1)))
-__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads)
+__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, ulong Threads
+// cryptonight_heavy
+#if (ALGO == 4)
+		, uint version
+#endif
+)
 {
 	ulong State[25];
 	uint ExpandedKey1[40];
@@ -464,11 +474,11 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
 		states += 25 * gIdx;
 
 #if(STRIDED_INDEX==0)
-		Scratchpad += gIdx * (ITERATIONS >> 2);
+		Scratchpad += gIdx * (MEMORY >> 4);
 #elif(STRIDED_INDEX==1)
 		Scratchpad += gIdx;
 #elif(STRIDED_INDEX==2)
-		Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0);
+		Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
 #endif
 
 		((ulong8 *)State)[0] = vload8(0, input);
@@ -507,13 +517,41 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
 	}
 
 	mem_fence(CLK_LOCAL_MEM_FENCE);
+		
+// cryptonight_heavy
+#if (ALGO == 4)
+	if(version >= 3)
+	{
+		__local uint4 xin[8][WORKSIZE];
+
+		/* Also left over threads performe this loop.
+		 * The left over thread results will be ignored
+		 */
+		for(size_t i=0; i < 16; i++)
+		{
+			#pragma unroll
+			for(int j = 0; j < 10; ++j)
+				text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey1)[j]);
+			barrier(CLK_LOCAL_MEM_FENCE);
+			xin[get_local_id(1)][get_local_id(0)] = text;
+			barrier(CLK_LOCAL_MEM_FENCE);
+			text = mix_and_propagate(xin);
+		}
+	}
+#endif
+
 #if(COMP_MODE==1)
 	// do not use early return here
 	if(gIdx < Threads)
 #endif
 	{
+		int iterations = MEMORY >> 7;
+#if (ALGO == 4)
+		if(version < 3)
+			iterations >>= 1;
+#endif
 		#pragma unroll 2
-		for(int i = 0; i < (ITERATIONS >> 5); ++i)
+		for(int i = 0; i < iterations; ++i)
 		{
 			#pragma unroll
 			for(int j = 0; j < 10; ++j)
@@ -525,8 +563,22 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
 	mem_fence(CLK_GLOBAL_MEM_FENCE);
 }
 
+#define VARIANT1_1(p) \
+	uint table = 0x75310U; \
+	uint index = (((p).s2 >> 26) & 12) | (((p).s2 >> 23) & 2); \
+	(p).s2 ^= ((table >> index) & 0x30U) << 24
+
+#define VARIANT1_2(p) ((uint2 *)&(p))[0] ^= tweak1_2
+
+#define VARIANT1_INIT() \
+	tweak1_2 = as_uint2(input[4]); \
+	tweak1_2.s0 >>= 24; \
+	tweak1_2.s0 |= tweak1_2.s1 << 8; \
+	tweak1_2.s1 = get_global_id(0); \
+	tweak1_2 ^= as_uint2(states[24])
+
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Threads)
+__kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, ulong Threads, __global ulong *input)
 {
 	ulong a[2], b[2];
 	__local uint AES0[256], AES1[256], AES2[256], AES3[256];
@@ -544,6 +596,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 
 	barrier(CLK_LOCAL_MEM_FENCE);
 
+    uint2 tweak1_2;
 	uint4 b_x;
 #if(COMP_MODE==1)
 	// do not use early return here
@@ -552,11 +605,11 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 	{
 		states += 25 * gIdx;
 #if(STRIDED_INDEX==0)
-		Scratchpad += gIdx * (ITERATIONS >> 2);
+		Scratchpad += gIdx * (MEMORY >> 4);
 #elif(STRIDED_INDEX==1)
 		Scratchpad += gIdx;
 #elif(STRIDED_INDEX==2)
-		Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0);
+		Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
 #endif
 
 		a[0] = states[0] ^ states[4];
@@ -565,6 +618,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 		b[1] = states[3] ^ states[7];
 
 		b_x = ((uint4 *)b)[0];
+	    VARIANT1_INIT();
 	}
 
 	mem_fence(CLK_LOCAL_MEM_FENCE);
@@ -581,9 +635,10 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 
 			((uint4 *)c)[0] = Scratchpad[IDX((a[0] & MASK) >> 4)];
 			((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
-			//b_x ^= ((uint4 *)c)[0];
 
-			Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0];
+			b_x ^= ((uint4 *)c)[0];
+			VARIANT1_1(b_x);
+			Scratchpad[IDX((a[0] & MASK) >> 4)] = b_x;
 
 			uint4 tmp;
 			tmp = Scratchpad[IDX((c[0] & MASK) >> 4)];
@@ -591,18 +646,129 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Thre
 			a[1] += c[0] * as_ulong2(tmp).s0;
 			a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
 
+			VARIANT1_2(a[1]);
 			Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
+			VARIANT1_2(a[1]);
+
+			((uint4 *)a)[0] ^= tmp;
+
+			b_x = ((uint4 *)c)[0];
+		}
+	}
+	mem_fence(CLK_GLOBAL_MEM_FENCE);
+}
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, ulong Threads
+// cryptonight_heavy
+#if (ALGO == 4)
+		, uint version
+#endif
+)
+{
+	ulong a[2], b[2];
+	__local uint AES0[256], AES1[256], AES2[256], AES3[256];
+
+	const ulong gIdx = getIdx();
+
+	for(int i = get_local_id(0); i < 256; i += WORKSIZE)
+	{
+		const uint tmp = AES0_C[i];
+		AES0[i] = tmp;
+		AES1[i] = rotate(tmp, 8U);
+		AES2[i] = rotate(tmp, 16U);
+		AES3[i] = rotate(tmp, 24U);
+	}
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	uint4 b_x;
+#if(COMP_MODE==1)
+	// do not use early return here
+	if(gIdx < Threads)
+#endif
+	{
+		states += 25 * gIdx;
+#if(STRIDED_INDEX==0)
+		Scratchpad += gIdx * (MEMORY >> 4);
+#elif(STRIDED_INDEX==1)
+		Scratchpad += gIdx;
+#elif(STRIDED_INDEX==2)
+		Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
+#endif
+
+		a[0] = states[0] ^ states[4];
+		b[0] = states[2] ^ states[6];
+		a[1] = states[1] ^ states[5];
+		b[1] = states[3] ^ states[7];
+
+		b_x = ((uint4 *)b)[0];
+	}
+
+	mem_fence(CLK_LOCAL_MEM_FENCE);
+
+#if(COMP_MODE==1)
+	// do not use early return here
+	if(gIdx < Threads)
+#endif
+	{
+		ulong idx0 = a[0];
+		ulong mask = MASK;
+
+		int iterations = ITERATIONS;
+#if (ALGO == 4)
+		if(version < 3)
+		{
+			iterations <<= 1;
+			mask -= 0x200000;
+		}
+#endif
+		#pragma unroll 8
+		for(int i = 0; i < iterations; ++i)
+		{
+			ulong c[2];
+
+			((uint4 *)c)[0] = Scratchpad[IDX((idx0 & mask) >> 4)];
+			((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);
+			//b_x ^= ((uint4 *)c)[0];
+
+			Scratchpad[IDX((idx0 & mask) >> 4)] = b_x ^ ((uint4 *)c)[0];
+
+			uint4 tmp;
+			tmp = Scratchpad[IDX((c[0] & mask) >> 4)];
+
+			a[1] += c[0] * as_ulong2(tmp).s0;
+			a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
+
+			Scratchpad[IDX((c[0] & mask) >> 4)] = ((uint4 *)a)[0];
 
 			((uint4 *)a)[0] ^= tmp;
+			idx0 = a[0];
 
 			b_x = ((uint4 *)c)[0];
+// cryptonight_heavy
+#if (ALGO == 4)
+			if(version >= 3)
+			{
+				long n = *((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4))));
+				int d = ((__global int*)(Scratchpad + (IDX((idx0 & mask) >> 4))))[2];
+				long q = n / (d | 0x5);
+				*((__global long*)(Scratchpad + (IDX((idx0 & mask) >> 4)))) = n ^ q;
+				idx0 = d ^ q;
+			}
+#endif
 		}
 	}
 	mem_fence(CLK_GLOBAL_MEM_FENCE);
 }
 
 __attribute__((reqd_work_group_size(WORKSIZE, 8, 1)))
-__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads)
+__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, ulong Threads
+// cryptonight_heavy
+#if (ALGO == 4)
+	, uint version
+#endif
+		)
 {
 	__local uint AES0[256], AES1[256], AES2[256], AES3[256];
 	uint ExpandedKey2[40];
@@ -631,11 +797,11 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 	{
 		states += 25 * gIdx;
 #if(STRIDED_INDEX==0)
-		Scratchpad += gIdx * (ITERATIONS >> 2);
+		Scratchpad += gIdx * (MEMORY >> 4);
 #elif(STRIDED_INDEX==1)
 		Scratchpad += gIdx;
 #elif(STRIDED_INDEX==2)
-		Scratchpad += get_group_id(0) * (ITERATIONS >> 2) * WORKSIZE + MEM_CHUNK * get_local_id(0);
+		Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
 #endif
 
 		#if defined(__Tahiti__) || defined(__Pitcairn__)
@@ -655,13 +821,67 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 
 	barrier(CLK_LOCAL_MEM_FENCE);
 
+#if (ALGO == 4)
+	__local uint4 xin[8][WORKSIZE];
+#endif
+
 #if(COMP_MODE==1)
 	// do not use early return here
 	if(gIdx < Threads)
 #endif
 	{
+		int iterations = MEMORY >> 7;
+#if (ALGO == 4)
+		if(version < 3)
+		{
+			iterations >>= 1;
+			#pragma unroll 2
+			for(int i = 0; i < iterations; ++i)
+			{
+				text ^= Scratchpad[IDX((i << 3) + get_local_id(1))];
+
+				#pragma unroll 10
+				for(int j = 0; j < 10; ++j)
+					text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]);
+			}
+		}
+		else
+		{
+			#pragma unroll 2
+			for(int i = 0; i < iterations; ++i)
+			{
+				text ^= Scratchpad[IDX((i << 3) + get_local_id(1))];
+
+				#pragma unroll 10
+				for(int j = 0; j < 10; ++j)
+					text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]);
+
+
+				barrier(CLK_LOCAL_MEM_FENCE);
+				xin[get_local_id(1)][get_local_id(0)] = text;
+				barrier(CLK_LOCAL_MEM_FENCE);
+				text = mix_and_propagate(xin);
+			}
+
+			#pragma unroll 2
+			for(int i = 0; i < iterations; ++i)
+			{
+				text ^= Scratchpad[IDX((i << 3) + get_local_id(1))];
+
+				#pragma unroll 10
+				for(int j = 0; j < 10; ++j)
+					text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]);
+
+
+				barrier(CLK_LOCAL_MEM_FENCE);
+				xin[get_local_id(1)][get_local_id(0)] = text;
+				barrier(CLK_LOCAL_MEM_FENCE);
+				text = mix_and_propagate(xin);
+			}
+		}
+#else
 		#pragma unroll 2
-		for(int i = 0; i < (ITERATIONS >> 5); ++i)
+		for(int i = 0; i < iterations; ++i)
 		{
 			text ^= Scratchpad[IDX((i << 3) + get_local_id(1))];
 
@@ -669,7 +889,34 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 			for(int j = 0; j < 10; ++j)
 				text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]);
 		}
+#endif
+	}
+
+// cryptonight_heavy
+#if (ALGO == 4)
+	if(version >= 3)
+	{
+		/* Also left over threads performe this loop.
+		 * The left over thread results will be ignored
+		 */
+		for(size_t i=0; i < 16; i++)
+		{
+			#pragma unroll
+			for(int j = 0; j < 10; ++j)
+				text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]);
+			barrier(CLK_LOCAL_MEM_FENCE);
+			xin[get_local_id(1)][get_local_id(0)] = text;
+			barrier(CLK_LOCAL_MEM_FENCE);
+			text = mix_and_propagate(xin);
+		}
+	}
+#endif
 
+#if(COMP_MODE==1)
+	// do not use early return here
+	if(gIdx < Threads)
+#endif
+	{
 		vstore2(as_ulong2(text), get_local_id(1) + 4, states);
 	}
 
diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp
index 895010503ff2fb8d672aa9eef287ba9d78fe84be..ea057a0f6f1e8e50a1d5681dd81625bc4def7a26 100644
--- a/xmrstak/backend/amd/autoAdjust.hpp
+++ b/xmrstak/backend/amd/autoAdjust.hpp
@@ -83,15 +83,7 @@ private:
 
 		constexpr size_t byteToMiB = 1024u * 1024u;
 
-		size_t hashMemSize;
-		if(::jconf::inst()->IsCurrencyMonero())
-		{
-			hashMemSize = MONERO_MEMORY;
-		}
-		else
-		{
-			hashMemSize = AEON_MEMORY;
-		}
+		size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
 
 		std::string conf;
 		for(auto& ctx : devVec)
@@ -118,7 +110,7 @@ private:
 				maxThreads = 2024u;
 			}
 			// increase all intensity limits by two for aeon
-			if(!::jconf::inst()->IsCurrencyMonero())
+			if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
 				maxThreads *= 2u;
 
 			// keep 128MiB memory free (value is randomly chosen)
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 8dfbce55dd40c62cc4b0e7a64745bf0e26561999..46a04d5053921b04746cce162b124de04da5a595 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -191,9 +191,20 @@ void minethd::work_main()
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
-	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyMonero());
+	auto miner_algo = ::jconf::inst()->GetMiningAlgo();
+	cn_hash_fun hash_fun;
+	if(miner_algo == cryptonight_monero || miner_algo == cryptonight_heavy)
+	{
+		// start with cryptonight and switch later if fork version is reached
+		hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight);
+	}
+	else
+		hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
+
 	globalStates::inst().iConsumeCnt++;
 
+	uint8_t version = 0;
+
 	while (bQuit == 0)
 	{
 		if (oWork.bStall)
@@ -207,6 +218,16 @@ void minethd::work_main()
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
+			uint8_t new_version = oWork.getVersion();
+			if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7)
+			{
+				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero);
+			}
+			else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3)
+			{
+				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy);
+			}
+			version = new_version;
 			continue;
 		}
 
@@ -215,7 +236,8 @@ void minethd::work_main()
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		uint64_t target = oWork.iTarget;
-		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
+		/// \todo add monero hard for version
+		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo, version);
 
 		if(oWork.bNiceHash)
 			pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
@@ -231,7 +253,7 @@ void minethd::work_main()
 			cl_uint results[0x100];
 			memset(results,0,sizeof(cl_uint)*(0x100));
 
-			XMRRunJob(pGpuCtx, results);
+			XMRRunJob(pGpuCtx, results, miner_algo, version);
 
 			for(size_t i = 0; i < results[0xFF]; i++)
 			{
@@ -258,6 +280,16 @@ void minethd::work_main()
 		}
 
 		consume_work();
+		uint8_t new_version = oWork.getVersion();
+		if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7)
+		{
+			hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero);
+		}
+		else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3)
+		{
+			hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy);
+		}
+		version = new_version;
 	}
 }
 
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
index ddeb89b3199c921d0ab27276aa1dfeb8a5d5efda..568abb5cd6a86ef7b12fb8cd406bbb561ddbaa47 100644
--- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -28,16 +28,8 @@ public:
 
 	autoAdjust()
 	{
-		if(::jconf::inst()->IsCurrencyMonero())
-		{
-			hashMemSize = MONERO_MEMORY;
-			halfHashMemSize = hashMemSize / 2u;
-		}
-		else
-		{
-			hashMemSize = AEON_MEMORY;
-			halfHashMemSize = hashMemSize / 2u;
-		}
+		hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+		halfHashMemSize = hashMemSize / 2u;
 	}
 
 	bool printConfig()
diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h
index 631c39a4a52d070527012a0a39c77479d56e9605..5c9a73332fc1f0435e6eec210ffa3d73981fcc4a 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight.h
@@ -7,8 +7,6 @@ extern "C" {
 
 #include <stddef.h>
 #include <inttypes.h>
-#include "xmrstak/backend/cryptonight.hpp"
-
 
 typedef struct {
 	uint8_t hash_state[224]; // Need only 200, explicit align
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
index e4ccbc3845390df1084328187913fb5901b18591..85373e828eb605b17ca5d371f1130b816c75a3a7 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h
@@ -16,6 +16,7 @@
 #pragma once
 
 #include "cryptonight.h"
+#include "xmrstak/backend/cryptonight.hpp"
 #include <memory.h>
 #include <stdio.h>
 
@@ -148,7 +149,20 @@ static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i
 	*x7 = soft_aesenc(*x7, key);
 }
 
-template<size_t MEM, bool SOFT_AES, bool PREFETCH>
+inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
+{
+    __m128i tmp0 = x0;
+    x0 = _mm_xor_si128(x0, x1);
+    x1 = _mm_xor_si128(x1, x2);
+    x2 = _mm_xor_si128(x2, x3);
+    x3 = _mm_xor_si128(x3, x4);
+    x4 = _mm_xor_si128(x4, x5);
+    x5 = _mm_xor_si128(x5, x6);
+    x6 = _mm_xor_si128(x6, x7);
+    x7 = _mm_xor_si128(x7, tmp0);
+}
+
+template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO>
 void cn_explode_scratchpad(const __m128i* input, __m128i* output)
 {
 	// This is more than we have registers, compiler will assign 2 keys on the stack
@@ -166,6 +180,40 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output)
 	xin6 = _mm_load_si128(input + 10);
 	xin7 = _mm_load_si128(input + 11);
 
+	if(ALGO == cryptonight_heavy)
+	{
+		for(size_t i=0; i < 16; i++)
+		{
+			if(SOFT_AES)
+			{
+				soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+			}
+			else
+			{
+				aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+				aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+			}
+			mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+		}
+	}
+
 	for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
 	{
 		if(SOFT_AES)
@@ -213,7 +261,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output)
 	}
 }
 
-template<size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<size_t MEM, bool SOFT_AES, bool PREFETCH, xmrstak_algo ALGO>
 void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 {
 	// This is more than we have registers, compiler will assign 2 keys on the stack
@@ -275,6 +323,93 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 			aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
 			aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
 		}
+
+		if(ALGO == cryptonight_heavy)
+			mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+	}
+
+	if(ALGO == cryptonight_heavy)
+	{
+		for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+		{
+			if(PREFETCH)
+				_mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA);
+
+			xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
+			xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
+			xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
+			xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
+
+			if(PREFETCH)
+				_mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA);
+
+			xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
+			xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
+			xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
+			xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
+
+			if(SOFT_AES)
+			{
+				soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+			}
+			else
+			{
+				aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+			}
+
+			if(ALGO == cryptonight_heavy)
+				mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+		}
+
+		for(size_t i=0; i < 16; i++)
+		{
+			if(SOFT_AES)
+			{
+				soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+			}
+			else
+			{
+				aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+				aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+			}
+
+			mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+		}
 	}
 
 	_mm_store_si128(output + 4, xout0);
@@ -287,13 +422,45 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 	_mm_store_si128(output + 11, xout7);
 }
 
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+	mem_out[0] = _mm_cvtsi128_si64(tmp);
+
+	tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+	uint64_t vh = _mm_cvtsi128_si64(tmp);
+
+	uint8_t x = vh >> 24;
+	static const uint16_t table = 0x7531;
+	const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+	vh ^= ((table >> index) & 0x3) << 28;
+
+	mem_out[1] = vh;
+}
+
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
 void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
 {
+	constexpr size_t MASK = cn_select_mask<ALGO>();
+	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+	constexpr size_t MEM = cn_select_memory<ALGO>();
+
+	if(ALGO == cryptonight_monero && len < 43)
+	{
+		memset(output, 0, 32);
+		return;
+	}
+
 	keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
 
+	uint64_t monero_const;
+	if(ALGO == cryptonight_monero)
+	{
+		monero_const  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
+		monero_const ^=  *(reinterpret_cast<const uint64_t*>(ctx0->hash_state) + 24);
+	}
+
 	// Optim - 99% time boundary
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);
+	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);
 
 	uint8_t* l0 = ctx0->long_state;
 	uint64_t* h0 = (uint64_t*)ctx0->hash_state;
@@ -315,8 +482,13 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
 
-		_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		if(ALGO == cryptonight_monero)
+			cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		else
+			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+
 		idx0 = _mm_cvtsi128_si64(cx);
+
 		if(PREFETCH)
 			_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
 		bx0 = cx;
@@ -333,14 +505,28 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 		if(PREFETCH)
 			_mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0);
 		ah0 += lo;
-		((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
+
+		if(ALGO == cryptonight_monero)
+			((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const;
+		else
+			((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
 		ah0 ^= ch;
 
 		idx0 = al0;
+
+		if(ALGO == cryptonight_heavy)
+		{
+			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
+			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
+			int64_t q = n / (d | 0x5);
+
+			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
+			idx0 = d ^ q;
+		}
 	}
 
 	// Optim - 90% time boundary
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
+	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
 
 	// Optim - 99% time boundary
 
@@ -351,15 +537,34 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c
 // This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon
 // to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output
 // We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons)
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
 void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 {
+	constexpr size_t MASK = cn_select_mask<ALGO>();
+	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+	constexpr size_t MEM = cn_select_memory<ALGO>();
+
+	if(ALGO == cryptonight_monero && len < 43)
+	{
+		memset(output, 0, 64);
+		return;
+	}
+
 	keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200);
 	keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200);
 
+	uint64_t monero_const_0, monero_const_1;
+	if(ALGO == cryptonight_monero)
+	{
+		monero_const_0  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35);
+		monero_const_0 ^=  *(reinterpret_cast<const uint64_t*>(ctx[0]->hash_state) + 24);
+		monero_const_1  =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + len + 35);
+		monero_const_1 ^=  *(reinterpret_cast<const uint64_t*>(ctx[1]->hash_state) + 24);
+	}
+
 	// Optim - 99% time boundary
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state);
-	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state);
+	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->hash_state, (__m128i*)ctx[0]->long_state);
+	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->hash_state, (__m128i*)ctx[1]->long_state);
 
 	uint8_t* l0 = ctx[0]->long_state;
 	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
@@ -387,7 +592,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0));
 
-		_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		if(ALGO == cryptonight_monero)
+			cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		else
+			_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+
 		idx0 = _mm_cvtsi128_si64(cx);
 		bx0 = cx;
 
@@ -401,7 +610,11 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		else
 			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1));
 
-		_mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+		if(ALGO == cryptonight_monero)
+			cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+		else
+			_mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx));
+
 		idx1 = _mm_cvtsi128_si64(cx);
 		bx1 = cx;
 
@@ -417,11 +630,26 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		axl0 += hi;
 		axh0 += lo;
 		((uint64_t*)&l0[idx0 & MASK])[0] = axl0;
-		((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
+
+		if(ALGO == cryptonight_monero)
+			((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0;
+		else
+			((uint64_t*)&l0[idx0 & MASK])[1] = axh0;
+
 		axh0 ^= ch;
 		axl0 ^= cl;
 		idx0 = axl0;
 
+		if(ALGO == cryptonight_heavy)
+		{
+			int64_t n  = ((int64_t*)&l0[idx0 & MASK])[0];
+			int32_t d  = ((int32_t*)&l0[idx0 & MASK])[2];
+			int64_t q = n / (d | 0x5);
+
+			((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
+			idx0 = d ^ q;
+		}
+
 		if(PREFETCH)
 			_mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0);
 
@@ -433,18 +661,33 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 		axl1 += hi;
 		axh1 += lo;
 		((uint64_t*)&l1[idx1 & MASK])[0] = axl1;
-		((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
+
+		if(ALGO == cryptonight_monero)
+			((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1;
+		else
+			((uint64_t*)&l1[idx1 & MASK])[1] = axh1;
+
 		axh1 ^= ch;
 		axl1 ^= cl;
 		idx1 = axl1;
 
+		if(ALGO == cryptonight_heavy)
+		{
+			int64_t n  = ((int64_t*)&l1[idx1 & MASK])[0];
+			int32_t d  = ((int32_t*)&l1[idx1 & MASK])[2];
+			int64_t q = n / (d | 0x5);
+
+			((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
+			idx1 = d ^ q;
+		}
+
 		if(PREFETCH)
 			_mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0);
 	}
 
 	// Optim - 90% time boundary
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state);
-	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state);
+	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[0]->long_state, (__m128i*)ctx[0]->hash_state);
+	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[1]->long_state, (__m128i*)ctx[1]->hash_state);
 
 	// Optim - 99% time boundary
 
@@ -455,12 +698,10 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 }
 
 #define CN_STEP1(a, b, c, l, ptr, idx)				\
-	a = _mm_xor_si128(a, c);				\
-	idx = _mm_cvtsi128_si64(a);				\
 	ptr = (__m128i *)&l[idx & MASK];			\
 	if(PREFETCH)						\
 		_mm_prefetch((const char*)ptr, _MM_HINT_T0);	\
-	c = _mm_load_si128(ptr)
+	c = _mm_load_si128(ptr); 
 
 #define CN_STEP2(a, b, c, l, ptr, idx)				\
 	if(SOFT_AES)						\
@@ -468,30 +709,64 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto
 	else							\
 		c = _mm_aesenc_si128(c, a);			\
 	b = _mm_xor_si128(b, c);				\
-	_mm_store_si128(ptr, b)
+	if(ALGO == cryptonight_monero) \
+		cryptonight_monero_tweak((uint64_t*)ptr, b); \
+	else \
+		_mm_store_si128(ptr, b);\
 
 #define CN_STEP3(a, b, c, l, ptr, idx)				\
 	idx = _mm_cvtsi128_si64(c);				\
 	ptr = (__m128i *)&l[idx & MASK];			\
 	if(PREFETCH)						\
 		_mm_prefetch((const char*)ptr, _MM_HINT_T0);	\
-	b = _mm_load_si128(ptr)
+	b = _mm_load_si128(ptr);
 
-#define CN_STEP4(a, b, c, l, ptr, idx)				\
+#define CN_STEP4(a, b, c, l, mc, ptr, idx)				\
 	lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi);		\
 	a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi));		\
-	_mm_store_si128(ptr, a)
+	if(ALGO == cryptonight_monero) \
+		_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
+	else \
+		_mm_store_si128(ptr, a);\
+	a = _mm_xor_si128(a, b); \
+	idx = _mm_cvtsi128_si64(a);	\
+	if(ALGO == cryptonight_heavy) \
+	{ \
+		int64_t n  = ((int64_t*)&l[idx & MASK])[0]; \
+		int32_t d  = ((int32_t*)&l[idx & MASK])[2]; \
+		int64_t q = n / (d | 0x5); \
+		((int64_t*)&l[idx & MASK])[0] = n ^ q; \
+		idx = d ^ q; \
+	}
+
+#define CONST_INIT(ctx, n) \
+	__m128i mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + n * len + 35) ^ \
+	*(reinterpret_cast<const uint64_t*>((ctx)->hash_state) + 24), 0);
 
 // This lovelier creation will do 3 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
 void cryptonight_triple_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 {
+	constexpr size_t MASK = cn_select_mask<ALGO>();
+	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+	constexpr size_t MEM = cn_select_memory<ALGO>();
+
+	if(ALGO == cryptonight_monero && len < 43)
+	{
+		memset(output, 0, 32 * 3);
+		return;
+	}
+
 	for (size_t i = 0; i < 3; i++)
 	{
 		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
 	}
 
+	CONST_INIT(ctx[0], 0);
+	CONST_INIT(ctx[1], 1);
+	CONST_INIT(ctx[2], 2);
+
 	uint8_t* l0 = ctx[0]->long_state;
 	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
 	uint8_t* l1 = ctx[1]->long_state;
@@ -509,9 +784,14 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
 	__m128i cx1 = _mm_set_epi64x(0, 0);
 	__m128i cx2 = _mm_set_epi64x(0, 0);
 
+	uint64_t idx0, idx1, idx2;
+	idx0 = _mm_cvtsi128_si64(ax0);
+	idx1 = _mm_cvtsi128_si64(ax1);
+	idx2 = _mm_cvtsi128_si64(ax2);
+
 	for (size_t i = 0; i < ITERATIONS/2; i++)
 	{
-		uint64_t idx0, idx1, idx2, hi, lo;
+		uint64_t hi, lo;
 		__m128i *ptr0, *ptr1, *ptr2;
 
 		// EVEN ROUND
@@ -527,9 +807,9 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
 		CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
 		CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
 
-		CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
+		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
 
 		// ODD ROUND
 		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -544,29 +824,44 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto
 		CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
 		CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
 
-		CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
+		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
 	}
 
 	for (size_t i = 0; i < 3; i++)
 	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
 		keccakf((uint64_t*)ctx[i]->hash_state, 24);
 		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
 	}
 }
 
 // This even lovelier creation will do 4 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
 void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 {
+	constexpr size_t MASK = cn_select_mask<ALGO>();
+	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+	constexpr size_t MEM = cn_select_memory<ALGO>();
+
+	if(ALGO == cryptonight_monero && len < 43)
+	{
+		memset(output, 0, 32 * 4);
+		return;
+	}
+
 	for (size_t i = 0; i < 4; i++)
 	{
 		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
 	}
 
+	CONST_INIT(ctx[0], 0);
+	CONST_INIT(ctx[1], 1);
+	CONST_INIT(ctx[2], 2);
+	CONST_INIT(ctx[3], 3);
+
 	uint8_t* l0 = ctx[0]->long_state;
 	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
 	uint8_t* l1 = ctx[1]->long_state;
@@ -588,10 +883,16 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
 	__m128i cx1 = _mm_set_epi64x(0, 0);
 	__m128i cx2 = _mm_set_epi64x(0, 0);
 	__m128i cx3 = _mm_set_epi64x(0, 0);
-
+	
+	uint64_t idx0, idx1, idx2, idx3;
+	idx0 = _mm_cvtsi128_si64(ax0);
+	idx1 = _mm_cvtsi128_si64(ax1);
+	idx2 = _mm_cvtsi128_si64(ax2);
+	idx3 = _mm_cvtsi128_si64(ax3);
+	
 	for (size_t i = 0; i < ITERATIONS/2; i++)
 	{
-		uint64_t idx0, idx1, idx2, idx3, hi, lo;
+		uint64_t hi, lo;
 		__m128i *ptr0, *ptr1, *ptr2, *ptr3;
 
 		// EVEN ROUND
@@ -610,10 +911,10 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
 		CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
 		CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
 
-		CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3);
+		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
+		CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
 
 		// ODD ROUND
 		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -631,30 +932,46 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni
 		CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
 		CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
 
-		CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3);
+		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
+		CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
 	}
 
 	for (size_t i = 0; i < 4; i++)
 	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
 		keccakf((uint64_t*)ctx[i]->hash_state, 24);
 		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
 	}
 }
 
 // This most lovely creation will do 5 cn hashes at a time.
-template<size_t MASK, size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH>
+template<xmrstak_algo ALGO, bool SOFT_AES, bool PREFETCH>
 void cryptonight_penta_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
 {
+	constexpr size_t MASK = cn_select_mask<ALGO>();
+	constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
+	constexpr size_t MEM = cn_select_memory<ALGO>();
+
+	if(ALGO == cryptonight_monero && len < 43)
+	{
+		memset(output, 0, 32 * 5);
+		return;
+	}
+
 	for (size_t i = 0; i < 5; i++)
 	{
 		keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200);
-		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
+		cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state);
 	}
 
+	CONST_INIT(ctx[0], 0);
+	CONST_INIT(ctx[1], 1);
+	CONST_INIT(ctx[2], 2);
+	CONST_INIT(ctx[3], 3);
+	CONST_INIT(ctx[4], 4);
+
 	uint8_t* l0 = ctx[0]->long_state;
 	uint64_t* h0 = (uint64_t*)ctx[0]->hash_state;
 	uint8_t* l1 = ctx[1]->long_state;
@@ -682,9 +999,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
 	__m128i cx3 = _mm_set_epi64x(0, 0);
 	__m128i cx4 = _mm_set_epi64x(0, 0);
 
+	uint64_t idx0, idx1, idx2, idx3, idx4;
+	idx0 = _mm_cvtsi128_si64(ax0);
+	idx1 = _mm_cvtsi128_si64(ax1);
+	idx2 = _mm_cvtsi128_si64(ax2);
+	idx3 = _mm_cvtsi128_si64(ax3);
+	idx4 = _mm_cvtsi128_si64(ax4);
+
 	for (size_t i = 0; i < ITERATIONS/2; i++)
 	{
-		uint64_t idx0, idx1, idx2, idx3, idx4, hi, lo;
+		uint64_t hi, lo;
 		__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
 
 		// EVEN ROUND
@@ -706,11 +1030,11 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
 		CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
 		CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
 
-		CN_STEP4(ax0, bx0, cx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, bx1, cx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, bx2, cx2, l2, ptr2, idx2);
-		CN_STEP4(ax3, bx3, cx3, l3, ptr3, idx3);
-		CN_STEP4(ax4, bx4, cx4, l4, ptr4, idx4);
+		CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
+		CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
+		CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
 
 		// ODD ROUND
 		CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
@@ -731,16 +1055,16 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton
 		CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
 		CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
 
-		CN_STEP4(ax0, cx0, bx0, l0, ptr0, idx0);
-		CN_STEP4(ax1, cx1, bx1, l1, ptr1, idx1);
-		CN_STEP4(ax2, cx2, bx2, l2, ptr2, idx2);
-		CN_STEP4(ax3, cx3, bx3, l3, ptr3, idx3);
-		CN_STEP4(ax4, cx4, bx4, l4, ptr4, idx4);
+		CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
+		CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
+		CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
+		CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
+		CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
 	}
 
 	for (size_t i = 0; i < 5; i++)
 	{
-		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
+		cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state);
 		keccakf((uint64_t*)ctx[i]->hash_state, 24);
 		extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i);
 	}
diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
index 1026b04dcf784940d6a14c2c82327f289e1247d8..17fa24b0648d9e0be72036847958f827b7ba97d9 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
+++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp
@@ -28,9 +28,9 @@ extern "C"
 #include "c_jh.h"
 #include "c_skein.h"
 }
+#include "xmrstak/backend/cryptonight.hpp"
 #include "cryptonight.h"
 #include "cryptonight_aesni.h"
-#include "xmrstak/backend/cryptonight.hpp"
 #include "xmrstak/misc/console.hpp"
 #include "xmrstak/jconf.hpp"
 #include <stdio.h>
@@ -202,15 +202,8 @@ size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
 
 cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
 {
-	size_t hashMemSize;
-	if(::jconf::inst()->IsCurrencyMonero())
-	{
-		hashMemSize = MONERO_MEMORY;
-	}
-	else
-	{
-		hashMemSize = AEON_MEMORY;
-	}
+	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+
 	cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096);
 
 	if(use_fast_mem == 0)
@@ -285,15 +278,8 @@ cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, al
 
 void cryptonight_free_ctx(cryptonight_ctx* ctx)
 {
-	size_t hashMemSize;
-	if(::jconf::inst()->IsCurrencyMonero())
-	{
-		hashMemSize = MONERO_MEMORY;
-	}
-	else
-	{
-		hashMemSize = AEON_MEMORY;
-	}
+	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
+
 	if(ctx->ctx_info[0] != 0)
 	{
 #ifdef _WIN32
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index cef4f8e306a24f1a1918ae22d8b9ef3d0014c625..e263aca41e7a217f7b242fed3b776edc6cdfb8c4 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -231,45 +231,44 @@ bool minethd::self_test()
 
 	bool bResult = true;
 
-	bool mineMonero = ::jconf::inst()->IsCurrencyMonero();
-	if(mineMonero)
+	if(::jconf::inst()->GetMiningAlgo() == cryptonight)
 	{
 		unsigned char out[32 * MAX_N];
 		cn_hash_fun hashf;
 		cn_hash_fun_multi hashf_multi;
 
-		hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+		hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 		hashf("This is a test", 14, out, ctx[0]);
 		bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
-		hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, mineMonero);
+		hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
 		hashf("This is a test", 14, out, ctx[0]);
 		bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
-		hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+		hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 		hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
 		bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 				"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
-		hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, mineMonero);
+		hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight);
 		hashf_multi("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx);
 		bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 				"\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
-		hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+		hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 		hashf_multi("This is a testThis is a testThis is a test", 14, out, ctx);
 		bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 96) == 0;
 
-		hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+		hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 		hashf_multi("This is a testThis is a testThis is a testThis is a test", 14, out, ctx);
 		bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 128) == 0;
 
-		hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, mineMonero);
+		hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight);
 		hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx);
 		bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
@@ -277,6 +276,12 @@ bool minethd::self_test()
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05"
 				"\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0;
 	}
+	else if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
+	{
+	}
+	else if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+	{
+	}
 
 	for (int i = 0; i < MAX_N; i++)
 		cryptonight_free_ctx(ctx[i]);
@@ -340,48 +345,56 @@ void minethd::consume_work()
 	globalStates::inst().inst().iConsumeCnt++;
 }
 
-minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero)
+minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
 {
 	// We have two independent flag bits in the functions
 	// therefore we will build a binary digit and select the
 	// function as a two digit binary
-	// Digit order SOFT_AES, NO_PREFETCH, MINER_ALGO
+
+	uint8_t algv;
+	switch(algo)
+	{
+	case cryptonight:
+		algv = 2;
+		break;
+	case cryptonight_lite:
+		algv = 1;
+		break;
+	case cryptonight_monero:
+		algv = 0;
+		break;
+	case cryptonight_heavy:
+		algv = 3;
+		break;
+	default:
+		algv = 2;
+		break;
+	}
 
 	static const cn_hash_fun func_table[] = {
-		/* there will be 8 function entries if `CONF_NO_MONERO` and `CONF_NO_AEON`
-		 * is not defined. If one is defined there will be 4 entries.
-		 */
-#ifndef CONF_NO_MONERO
-		cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
-		cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
-		cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
-		cryptonight_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>
-#endif
-#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO))
-		// comma will be added only if Monero and Aeon is build
-		,
-#endif
-#ifndef CONF_NO_AEON
-		cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
-		cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
-		cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
-		cryptonight_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>
-#endif
+		cryptonight_hash<cryptonight_monero, false, false>,
+		cryptonight_hash<cryptonight_monero, true, false>,
+		cryptonight_hash<cryptonight_monero, false, true>,
+		cryptonight_hash<cryptonight_monero, true, true>,
+		cryptonight_hash<cryptonight_lite, false, false>,
+		cryptonight_hash<cryptonight_lite, true, false>,
+		cryptonight_hash<cryptonight_lite, false, true>,
+		cryptonight_hash<cryptonight_lite, true, true>,
+		cryptonight_hash<cryptonight, false, false>,
+		cryptonight_hash<cryptonight, true, false>,
+		cryptonight_hash<cryptonight, false, true>,
+		cryptonight_hash<cryptonight, true, true>,
+		cryptonight_hash<cryptonight_heavy, false, false>,
+		cryptonight_hash<cryptonight_heavy, true, false>,
+		cryptonight_hash<cryptonight_heavy, false, true>,
+		cryptonight_hash<cryptonight_heavy, true, true>
 	};
 
-	std::bitset<3> digit;
-	digit.set(0, !bNoPrefetch);
-	digit.set(1, !bHaveAes);
-
-	// define aeon settings
-#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO)
-	// ignore 3rd bit if only one currency is active
-	digit.set(2, 0);
-#else
-	digit.set(2, !mineMonero);
-#endif
+	std::bitset<2> digit;
+	digit.set(0, !bHaveAes);
+	digit.set(1, !bNoPrefetch);
 
-	return func_table[digit.to_ulong()];
+	return func_table[ algv << 2 | digit.to_ulong() ];
 }
 
 void minethd::work_main()
@@ -401,7 +414,7 @@ void minethd::work_main()
 	uint32_t* piNonce;
 	job_result result;
 
-	hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero());
+	hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo());
 	ctx = minethd_alloc_ctx();
 
 	piHashVal = (uint64_t*)(result.bResult + 24);
@@ -434,6 +447,22 @@ void minethd::work_main()
 		if(oWork.bNiceHash)
 			result.iNonce = *piNonce;
 
+		if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+		{
+			if(oWork.bWorkBlob[0] >= 7)
+				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero);
+			else
+				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+		}
+
+		if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy)
+		{
+			if(oWork.bWorkBlob[0] >= 3)
+				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy);
+			else
+				hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+		}
+
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
 			if ((iCount++ & 0xF) == 0) //Store stats every 16 hashes
@@ -465,93 +494,105 @@ void minethd::work_main()
 	cryptonight_free_ctx(ctx);
 }
 
-minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero)
+minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo)
 {
 	// We have two independent flag bits in the functions
 	// therefore we will build a binary digit and select the
 	// function as a two digit binary
-	// Digit order SOFT_AES, NO_PREFETCH
+
+	uint8_t algv;
+	switch(algo)
+	{
+	case cryptonight:
+		algv = 2;
+		break;
+	case cryptonight_lite:
+		algv = 1;
+		break;
+	case cryptonight_monero:
+		algv = 0;
+		break;
+	default:
+		algv = 2;
+		break;
+	}
 
 	static const cn_hash_fun_multi func_table[] = {
-		/* there will be 8*(MAX_N-1) function entries if `CONF_NO_MONERO` and `CONF_NO_AEON`
-		 * is not defined. If one is defined there will be 4*(MAX_N-1) entries.
-		 */
-#ifndef CONF_NO_MONERO
-		cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
-		cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
-		cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
-		cryptonight_double_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
-		cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
-		cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
-		cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
-		cryptonight_triple_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
-		cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
-		cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
-		cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
-		cryptonight_quad_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>,
-		cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, false>,
-		cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, false, true>,
-		cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, false>,
-		cryptonight_penta_hash<MONERO_MASK, MONERO_ITER, MONERO_MEMORY, true, true>
-#endif
-#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO))
-		// comma will be added only if Monero and Aeon is build
-		,
-#endif
-#ifndef CONF_NO_AEON
-		cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
-		cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
-		cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
-		cryptonight_double_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
-		cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
-		cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
-		cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
-		cryptonight_triple_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
-		cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
-		cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
-		cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
-		cryptonight_quad_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>,
-		cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, false>,
-		cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, false, true>,
-		cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, false>,
-		cryptonight_penta_hash<AEON_MASK, AEON_ITER, AEON_MEMORY, true, true>
-#endif
+		cryptonight_double_hash<cryptonight_monero, false, false>,
+		cryptonight_double_hash<cryptonight_monero, true, false>,
+		cryptonight_double_hash<cryptonight_monero, false, true>,
+		cryptonight_double_hash<cryptonight_monero, true, true>,
+		cryptonight_triple_hash<cryptonight_monero, false, false>,
+		cryptonight_triple_hash<cryptonight_monero, true, false>,
+		cryptonight_triple_hash<cryptonight_monero, false, true>,
+		cryptonight_triple_hash<cryptonight_monero, true, true>,
+		cryptonight_quad_hash<cryptonight_monero, false, false>,
+		cryptonight_quad_hash<cryptonight_monero, true, false>,
+		cryptonight_quad_hash<cryptonight_monero, false, true>,
+		cryptonight_quad_hash<cryptonight_monero, true, true>,
+		cryptonight_penta_hash<cryptonight_monero, false, false>,
+		cryptonight_penta_hash<cryptonight_monero, true, false>,
+		cryptonight_penta_hash<cryptonight_monero, false, true>,
+		cryptonight_penta_hash<cryptonight_monero, true, true>,
+		cryptonight_double_hash<cryptonight_lite, false, false>,
+		cryptonight_double_hash<cryptonight_lite, true, false>,
+		cryptonight_double_hash<cryptonight_lite, false, true>,
+		cryptonight_double_hash<cryptonight_lite, true, true>,
+		cryptonight_triple_hash<cryptonight_lite, false, false>,
+		cryptonight_triple_hash<cryptonight_lite, true, false>,
+		cryptonight_triple_hash<cryptonight_lite, false, true>,
+		cryptonight_triple_hash<cryptonight_lite, true, true>,
+		cryptonight_quad_hash<cryptonight_lite, false, false>,
+		cryptonight_quad_hash<cryptonight_lite, true, false>,
+		cryptonight_quad_hash<cryptonight_lite, false, true>,
+		cryptonight_quad_hash<cryptonight_lite, true, true>,
+		cryptonight_penta_hash<cryptonight_lite, false, false>,
+		cryptonight_penta_hash<cryptonight_lite, true, false>,
+		cryptonight_penta_hash<cryptonight_lite, false, true>,
+		cryptonight_penta_hash<cryptonight_lite, true, true>,
+		cryptonight_double_hash<cryptonight, false, false>,
+		cryptonight_double_hash<cryptonight, true, false>,
+		cryptonight_double_hash<cryptonight, false, true>,
+		cryptonight_double_hash<cryptonight, true, true>,
+		cryptonight_triple_hash<cryptonight, false, false>,
+		cryptonight_triple_hash<cryptonight, true, false>,
+		cryptonight_triple_hash<cryptonight, false, true>,
+		cryptonight_triple_hash<cryptonight, true, true>,
+		cryptonight_quad_hash<cryptonight, false, false>,
+		cryptonight_quad_hash<cryptonight, true, false>,
+		cryptonight_quad_hash<cryptonight, false, true>,
+		cryptonight_quad_hash<cryptonight, true, true>,
+		cryptonight_penta_hash<cryptonight, false, false>,
+		cryptonight_penta_hash<cryptonight, true, false>,
+		cryptonight_penta_hash<cryptonight, false, true>,
+		cryptonight_penta_hash<cryptonight, true, true>
 	};
 
 	std::bitset<2> digit;
-	digit.set(0, !bNoPrefetch);
-	digit.set(1, !bHaveAes);
-
-	// define aeon settings
-#if defined(CONF_NO_AEON) || defined(CONF_NO_MONERO)
-	// ignore miner algo if only one currency is active
-	size_t miner_algo_base = 0;
-#else
-	size_t miner_algo_base = mineMonero ? 0 : 4*(MAX_N-1);
-#endif
-
-	N = (N<2) ? 2 : (N>MAX_N) ? MAX_N : N;
-	return func_table[miner_algo_base + 4*(N-2) + digit.to_ulong()];
+	digit.set(0, !bHaveAes);
+	digit.set(1, !bNoPrefetch);
+	
+	return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()];
 }
 
 void minethd::double_work_main()
 {
-	multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+	multiway_work_main<2>(func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
 }
 
 void minethd::triple_work_main()
 {
-	multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+	multiway_work_main<3>(func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
 }
 
 void minethd::quad_work_main()
 {
-	multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+	multiway_work_main<4>(func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
 }
 
 void minethd::penta_work_main()
 {
-	multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->IsCurrencyMonero()));
+	multiway_work_main<5>(func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, ::jconf::inst()->GetMiningAlgo()));
 }
 
 template<size_t N>
@@ -621,6 +662,22 @@ void minethd::multiway_work_main(cn_hash_fun_multi hash_fun_multi)
 		if(oWork.bNiceHash)
 			iNonce = *piNonce[0];
 
+		if(::jconf::inst()->GetMiningAlgo() == cryptonight_monero)
+		{
+			if(oWork.bWorkBlob[0] >= 7)
+				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_monero);
+			else
+				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+		}
+
+		if(::jconf::inst()->GetMiningAlgo() == cryptonight_heavy)
+		{
+			if(oWork.bWorkBlob[0] >= 3)
+				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight_heavy);
+			else
+				hash_fun_multi = func_multi_selector(N, ::jconf::inst()->HaveHardwareAes(), bNoPrefetch, cryptonight);
+		}
+
 		while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
 			if ((iCount++ & 0x7) == 0)  //Store stats every 8*N hashes
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 0433d0d369db4cd2caac9e355750ba936d4b0a9c..ef1bbd2d1f83e571acb64d24d6425c818233f545 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "xmrstak/jconf.hpp"
 #include "crypto/cryptonight.h"
 #include "xmrstak/backend/miner_work.hpp"
 #include "xmrstak/backend/iBackend.hpp"
@@ -23,14 +24,14 @@ public:
 
 	typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*);
 
-	static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, bool mineMonero);
+	static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
 	static bool thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id);
 
 	static cryptonight_ctx* minethd_alloc_ctx();
 
 private:
 	typedef void (*cn_hash_fun_multi)(const void*, size_t, void*, cryptonight_ctx**);
-	static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, bool mineMonero);
+	static cn_hash_fun_multi func_multi_selector(size_t N, bool bHaveAes, bool bNoPrefetch, xmrstak_algo algo);
 
 	minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, int64_t affinity);
 
diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp
index 0ef5ae70d0db9c1bdef49c97cec8885f32fe5475..fe10a9f92d1516b2aeee0604e6f853e53bef2932 100644
--- a/xmrstak/backend/cryptonight.hpp
+++ b/xmrstak/backend/cryptonight.hpp
@@ -1,12 +1,123 @@
 #pragma once
+#include <stddef.h>
+#include <inttypes.h>
+#include <type_traits>
+
+enum xmrstak_algo 
+{
+	invalid_algo = 0,
+	cryptonight = 1,
+	cryptonight_lite = 2,
+	cryptonight_monero = 3,
+	cryptonight_heavy = 4
+};
 
 // define aeon settings
-#define AEON_MEMORY 1048576llu
-#define AEON_MASK 0xFFFF0
-#define AEON_ITER 0x40000
+constexpr size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024;
+constexpr uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0;
+constexpr uint32_t CRYPTONIGHT_LITE_ITER = 0x40000;
+
+constexpr size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024;
+constexpr uint32_t CRYPTONIGHT_MASK = 0x1FFFF0;
+constexpr uint32_t CRYPTONIGHT_ITER = 0x80000;
+
+constexpr size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024;
+constexpr uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0;
+constexpr uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000;
+
+template<xmrstak_algo ALGO>
+inline constexpr size_t cn_select_memory() { return 0; }
+
+template<>
+inline constexpr size_t cn_select_memory<cryptonight>() { return CRYPTONIGHT_MEMORY; }
+
+template<>
+inline constexpr size_t cn_select_memory<cryptonight_lite>() { return CRYPTONIGHT_LITE_MEMORY; }
+
+template<>
+inline constexpr size_t cn_select_memory<cryptonight_monero>() { return CRYPTONIGHT_MEMORY; }
+
+template<>
+inline constexpr size_t cn_select_memory<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_MEMORY; }
+
+
+inline size_t cn_select_memory(xmrstak_algo algo)
+{
+	switch(algo)
+	{
+	case cryptonight:
+		return CRYPTONIGHT_MEMORY;
+	case cryptonight_lite:
+		return CRYPTONIGHT_LITE_MEMORY;
+	case cryptonight_monero:
+		return CRYPTONIGHT_MEMORY;
+	case cryptonight_heavy:
+		return CRYPTONIGHT_HEAVY_MEMORY;
+	default:
+		return 0;
+	}
+}
+
+template<xmrstak_algo ALGO>
+inline constexpr uint32_t cn_select_mask() { return 0; }
+
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight>() { return CRYPTONIGHT_MASK; }
+
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight_lite>() { return CRYPTONIGHT_LITE_MASK; }
+
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight_monero>() { return CRYPTONIGHT_MASK; }
+
+template<>
+inline constexpr uint32_t cn_select_mask<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_MASK; }
+
+inline size_t cn_select_mask(xmrstak_algo algo)
+{
+	switch(algo)
+	{
+	case cryptonight:
+		return CRYPTONIGHT_MASK;
+	case cryptonight_lite:
+		return CRYPTONIGHT_LITE_MASK;
+	case cryptonight_monero:
+		return CRYPTONIGHT_MASK;
+	case cryptonight_heavy:
+		return CRYPTONIGHT_HEAVY_MASK;
+	default:
+		return 0;
+	}
+}
+
+template<xmrstak_algo ALGO>
+inline constexpr uint32_t cn_select_iter() { return 0; }
+
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight>() { return CRYPTONIGHT_ITER; }
+
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight_lite>() { return CRYPTONIGHT_LITE_ITER; }
+
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight_monero>() { return CRYPTONIGHT_ITER; }
 
-// define xmr settings
-#define MONERO_MEMORY 2097152llu
-#define MONERO_MASK 0x1FFFF0
-#define MONERO_ITER 0x80000
+template<>
+inline constexpr uint32_t cn_select_iter<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_ITER; }
 
+inline size_t cn_select_iter(xmrstak_algo algo)
+{
+	switch(algo)
+	{
+	case cryptonight:
+		return CRYPTONIGHT_ITER;
+	case cryptonight_lite:
+		return CRYPTONIGHT_LITE_ITER;
+	case cryptonight_monero:
+		return CRYPTONIGHT_ITER;
+	case cryptonight_heavy:
+		return CRYPTONIGHT_HEAVY_ITER;
+	default:
+		return 0;
+	}
+}
diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp
index 4bfe429f97b49ba4a61fe07d03698b6291a2fd25..9e5a4e46408120f2828b09e25706af8ae8ed9dfa 100644
--- a/xmrstak/backend/miner_work.hpp
+++ b/xmrstak/backend/miner_work.hpp
@@ -74,5 +74,11 @@ namespace xmrstak
 
 			return *this;
 		}
+
+		uint8_t getVersion() const
+		{
+			return bWorkBlob[0];
+		}
+
 	};
 } // namepsace xmrstak
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index 867a998934e021e29a5d30b864cb19b895450065..153e4e33e339710e90d3c34b517b345a3e83f8dd 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -237,12 +237,20 @@ void minethd::work_main()
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
-	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, ::jconf::inst()->IsCurrencyMonero());
+	auto miner_algo = ::jconf::inst()->GetMiningAlgo();
+	cn_hash_fun hash_fun;
+	if(miner_algo == cryptonight_monero || miner_algo == cryptonight_heavy)
+	{
+		// start with cryptonight and switch later if fork version is reached
+		hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight);
+	}
+	else
+		hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
 	uint32_t iNonce;
 
 	globalStates::inst().iConsumeCnt++;
 
-	bool mineMonero = strcmp_i(::jconf::inst()->GetCurrency(), "monero");
+	uint8_t version = 0;
 
 	while (bQuit == 0)
 	{
@@ -257,6 +265,16 @@ void minethd::work_main()
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
+			uint8_t new_version = oWork.getVersion();
+			if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7)
+			{
+				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero);
+			}
+			else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3)
+			{
+				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy);
+			}
+			version = new_version;
 			continue;
 		}
 
@@ -281,11 +299,11 @@ void minethd::work_main()
 			uint32_t foundNonce[10];
 			uint32_t foundCount;
 
-			cryptonight_extra_cpu_prepare(&ctx, iNonce);
+			cryptonight_extra_cpu_prepare(&ctx, iNonce, miner_algo, version);
 
-			cryptonight_core_cpu_hash(&ctx, mineMonero);
+			cryptonight_core_cpu_hash(&ctx, miner_algo, iNonce, version);
 
-			cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce);
+			cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce, miner_algo, version);
 
 			for(size_t i = 0; i < foundCount; i++)
 			{
@@ -316,6 +334,16 @@ void minethd::work_main()
 		}
 
 		consume_work();
+		uint8_t new_version = oWork.getVersion();
+		if(miner_algo == cryptonight_monero && version < 7 && new_version >= 7)
+		{
+			hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_monero);
+		}
+		else if(miner_algo == cryptonight_heavy && version < 3 && new_version >= 3)
+		{
+			hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, cryptonight_heavy);
+		}
+		version = new_version;
 	}
 }
 
diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
index afbdbaf8823899267eb6467a41d2b570c0e6f282..29a35236ef72c183938789fd78437e6981fef35c 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
@@ -3,6 +3,9 @@
 #include <stdint.h>
 #include <string>
 
+#include "xmrstak/jconf.hpp"
+#include "xmrstak/backend/cryptonight.hpp"
+
 typedef struct {
 	int device_id;
 	const char *device_name;
@@ -20,6 +23,7 @@ typedef struct {
 	uint32_t *d_result_nonce;
 	uint32_t *d_long_state;
 	uint32_t *d_ctx_state;
+	uint32_t *d_ctx_state2;
 	uint32_t *d_ctx_a;
 	uint32_t *d_ctx_b;
 	uint32_t *d_ctx_key1;
@@ -41,8 +45,8 @@ int cuda_get_devicecount( int* deviceCount);
 int cuda_get_deviceinfo(nvid_ctx *ctx);
 int cryptonight_extra_cpu_init(nvid_ctx *ctx);
 void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, uint32_t len);
-void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce);
-void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce);
+void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo, uint8_t version);
+void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo, uint8_t version);
 }
 
-void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero);
+void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce, uint8_t version);
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index cc972740eda03a339f726c07d8c24a03cd37f71a..ede578f041f552089a223a1968a64cbc9218e9cf 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -6,6 +6,8 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 
+#include "xmrstak/jconf.hpp"
+
 #ifdef _WIN32
 #include <windows.h>
 extern "C" void compat_usleep(uint64_t waitTime)
@@ -106,8 +108,18 @@ __device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
 #endif
 }
 
-template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
-__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state, uint32_t * __restrict__ ctx_key1 )
+template< typename T >
+__device__ __forceinline__ void storeGlobal64( T* addr, T const & val )
+{
+#if (__CUDA_ARCH__ < 700)
+	asm volatile( "st.global.cg.u64 [%0], %1;" : : "l"( addr ), "l"( val ) );
+#else
+	*addr = val;
+#endif
+}
+
+template<size_t ITERATIONS, uint32_t MEMORY>
+__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state2, uint32_t * __restrict__ ctx_key1 )
 {
 	__shared__ uint32_t sharedMemory[1024];
 
@@ -117,7 +129,7 @@ __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int parti
 	const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3;
 	const int sub = ( threadIdx.x & 7 ) << 2;
 
-	const int batchsize = ITERATIONS >> bfactor;
+	const int batchsize = MEMORY >> bfactor;
 	const int start = partidx * batchsize;
 	const int end = start + batchsize;
 
@@ -131,18 +143,18 @@ __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int parti
 	if( partidx == 0 )
 	{
 		// first round
-		MEMCPY8( text, ctx_state + thread * 50 + sub + 16, 2 );
+		MEMCPY8( text, ctx_state2 + thread * 50 + sub + 16, 2 );
 	}
 	else
 	{
 		// load previous text data
-		MEMCPY8( text, &long_state[( (uint64_t) thread << THREAD_SHIFT ) + sub + start - 32], 2 );
+		MEMCPY8( text, &long_state[( (uint64_t) thread * MEMORY ) + sub + start - 32], 2 );
 	}
 	__syncthreads( );
 	for ( int i = start; i < end; i += 32 )
 	{
 		cn_aes_pseudo_round_mut( sharedMemory, text, key );
-		MEMCPY8(&long_state[((uint64_t) thread << THREAD_SHIFT) + (sub + i)], text, 2);
+		MEMCPY8(&long_state[((uint64_t) thread * MEMORY) + (sub + i)], text, 2);
 	}
 }
 
@@ -157,33 +169,37 @@ __forceinline__ __device__ void unusedVar( const T& )
  * - this method can be used with all compute architectures
  * - for <sm_30 shared memory is needed
  *
+ * group_n - must be a power of 2!
+ * 
  * @param ptr pointer to shared memory, size must be `threadIdx.x * sizeof(uint32_t)`
  *            value can be NULL for compute architecture >=sm_30
- * @param sub thread number within the group, range [0;4)
+ * @param sub thread number within the group, range [0:group_n]
  * @param value value to share with other threads within the group
- * @param src thread number within the group from where the data is read, range [0;4)
+ * @param src thread number within the group from where the data is read, range [0:group_n]
  */
+template<size_t group_n>
 __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_t sub,const int val,const uint32_t src)
 {
 #if( __CUDA_ARCH__ < 300 )
     ptr[sub] = val;
-    return ptr[src&3];
+    return ptr[src & (group_n-1)];
 #else
     unusedVar( ptr );
     unusedVar( sub );
 #   if(__CUDACC_VER_MAJOR__ >= 9)
-    return __shfl_sync(0xFFFFFFFF, val, src, 4 );
+    return __shfl_sync(0xFFFFFFFF, val, src, group_n );
 #	else
-	return __shfl( val, src, 4 );
+	return __shfl( val, src, group_n );
 #	endif
 #endif
 }
 
-template<size_t ITERATIONS, uint32_t THREAD_SHIFT, uint32_t MASK>
+template<size_t ITERATIONS, uint32_t MEMORY, uint32_t MASK, xmrstak_algo ALGO>
 #ifdef XMR_STAK_THREADS
 __launch_bounds__( XMR_STAK_THREADS * 4 )
 #endif
-__global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b )
+__global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b, uint32_t * d_ctx_state,
+		uint32_t startNonce, uint32_t * __restrict__ d_input )
 {
 	__shared__ uint32_t sharedMemory[1024];
 
@@ -192,6 +208,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 	__syncthreads( );
 
 	const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 2;
+	const uint32_t nonce = startNonce + thread;
 	const int sub = threadIdx.x & 3;
 	const int sub2 = sub & 2;
 
@@ -205,30 +222,48 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 		return;
 
 	int i, k;
-        uint32_t j;
+	uint32_t j;
 	const int batchsize = (ITERATIONS * 2) >> ( 2 + bfactor );
 	const int start = partidx * batchsize;
 	const int end = start + batchsize;
-	uint32_t * long_state = &d_long_state[(IndexType) thread << THREAD_SHIFT];
-	uint32_t * ctx_a = d_ctx_a + thread * 4;
-	uint32_t * ctx_b = d_ctx_b + thread * 4;
-	uint32_t a, d[2];
+	uint32_t * long_state = &d_long_state[(IndexType) thread * MEMORY];
+	uint32_t a, d[2], idx0;
 	uint32_t t1[2], t2[2], res;
 
-	a = ctx_a[sub];
-	d[1] = ctx_b[sub];
+	uint32_t tweak1_2[2];
+	if (ALGO == cryptonight_monero)
+	{
+		uint32_t * state = d_ctx_state + thread * 50;
+		tweak1_2[0] = (d_input[8] >> 24) | (d_input[9] << 8);
+		tweak1_2[0] ^= state[48];
+		tweak1_2[1] = nonce;
+		tweak1_2[1] ^= state[49];
+	}
+
+	a = (d_ctx_a + thread * 4)[sub];
+	idx0 = shuffle<4>(sPtr,sub, a, 0);
+	if(ALGO == cryptonight_heavy)
+	{
+		if(partidx != 0)
+		{
+			// state is stored after all ctx_b states
+			idx0 = *(d_ctx_b + threads * 4 + thread);
+		}
+	}
+	d[1] = (d_ctx_b + thread * 4)[sub];
+
 	#pragma unroll 2
 	for ( i = start; i < end; ++i )
 	{
 		#pragma unroll 2
 		for ( int x = 0; x < 2; ++x )
 		{
-			j = ( ( shuffle(sPtr,sub, a, 0) & MASK ) >> 2 ) + sub;
+			j = ( ( idx0 & MASK ) >> 2 ) + sub;
 
 			const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j );
-			const uint32_t x_1 = shuffle(sPtr,sub, x_0, sub + 1);
-			const uint32_t x_2 = shuffle(sPtr,sub, x_0, sub + 2);
-			const uint32_t x_3 = shuffle(sPtr,sub, x_0, sub + 3);
+			const uint32_t x_1 = shuffle<4>(sPtr,sub, x_0, sub + 1);
+			const uint32_t x_2 = shuffle<4>(sPtr,sub, x_0, sub + 2);
+			const uint32_t x_3 = shuffle<4>(sPtr,sub, x_0, sub + 3);
 			d[x] = a ^
 				t_fn0( x_0 & 0xff ) ^
 				t_fn1( (x_1 >> 8) & 0xff ) ^
@@ -237,41 +272,74 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 
 
 			//XOR_BLOCKS_DST(c, b, &long_state[j]);
-			t1[0] = shuffle(sPtr,sub, d[x], 0);
-			//long_state[j] = d[0] ^ d[1];
-			storeGlobal32( long_state + j, d[0] ^ d[1] );
-
+			t1[0] = shuffle<4>(sPtr,sub, d[x], 0);
+
+			const uint32_t z = d[0] ^ d[1];
+			if(ALGO == cryptonight_monero)
+			{
+				const uint32_t table = 0x75310U;
+				const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2);
+				const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24;
+				storeGlobal32( long_state + j, sub == 2 ? fork_7 : z );
+			}
+			else
+				storeGlobal32( long_state + j, z );
+			
 			//MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & MASK]);
 			j = ( ( *t1 & MASK ) >> 2 ) + sub;
 
 			uint32_t yy[2];
 			*( (uint64_t*) yy ) = loadGlobal64<uint64_t>( ( (uint64_t *) long_state )+( j >> 1 ) );
 			uint32_t zz[2];
-			zz[0] = shuffle(sPtr,sub, yy[0], 0);
-			zz[1] = shuffle(sPtr,sub, yy[1], 0);
+			zz[0] = shuffle<4>(sPtr,sub, yy[0], 0);
+			zz[1] = shuffle<4>(sPtr,sub, yy[1], 0);
 
-			t1[1] = shuffle(sPtr,sub, d[x], 1);
+			t1[1] = shuffle<4>(sPtr,sub, d[x], 1);
 			#pragma unroll
 			for ( k = 0; k < 2; k++ )
-				t2[k] = shuffle(sPtr,sub, a, k + sub2);
+				t2[k] = shuffle<4>(sPtr,sub, a, k + sub2);
 
             *( (uint64_t *) t2 ) += sub2 ? ( *( (uint64_t *) t1 ) * *( (uint64_t*) zz ) ) : __umul64hi( *( (uint64_t *) t1 ), *( (uint64_t*) zz ) );
 
 			res = *( (uint64_t *) t2 )  >> ( sub & 1 ? 32 : 0 );
 
-			storeGlobal32( long_state + j, res );
+			
+			if(ALGO == cryptonight_monero)
+			{
+				const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res;
+				const uint32_t long_state_update = sub2 ? tweaked_res : res;
+				storeGlobal32( long_state + j, long_state_update );
+			}
+			else
+				storeGlobal32( long_state + j, res );
+			
 			a = ( sub & 1 ? yy[1] : yy[0] ) ^ res;
+			idx0 = shuffle<4>(sPtr,sub, a, 0);
+			if(ALGO == cryptonight_heavy)
+			{
+				int64_t n = loadGlobal64<uint64_t>( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3));
+				int32_t d = loadGlobal32<uint32_t>( (uint32_t*)(( (uint64_t *) long_state ) + (( idx0 & MASK) >> 3) + 1u ));
+				int64_t q = n / (d | 0x5);
+
+				if(sub&1)
+					storeGlobal64<uint64_t>( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3), n ^ q );
+
+				idx0 = d ^ q;
+			}
 		}
 	}
 
 	if ( bfactor > 0 )
 	{
-		ctx_a[sub] = a;
-		ctx_b[sub] = d[1];
+		(d_ctx_a + thread * 4)[sub] = a;
+		(d_ctx_b + thread * 4)[sub] = d[1];
+		if(ALGO == cryptonight_heavy)
+			if(sub&1)
+				*(d_ctx_b + threads * 4 + thread) = idx0;
 	}
 }
 
-template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
+template<size_t ITERATIONS, uint32_t MEMORY, xmrstak_algo ALGO>
 __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 )
 {
 	__shared__ uint32_t sharedMemory[1024];
@@ -280,9 +348,10 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	__syncthreads( );
 
 	int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3;
-	int sub = ( threadIdx.x & 7 ) << 2;
+	int subv = ( threadIdx.x & 7 );
+	int sub = subv << 2;
 
-	const int batchsize = ITERATIONS >> bfactor;
+	const int batchsize = MEMORY >> bfactor;
 	const int start = partidx * batchsize;
 	const int end = start + batchsize;
 
@@ -294,20 +363,53 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	MEMCPY8( text, d_ctx_state + thread * 50 + sub + 16, 2 );
 
 	__syncthreads( );
+	
+#if( __CUDA_ARCH__ < 300 )
+        extern __shared__ uint32_t shuffleMem[];
+        volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC));
+#else
+        volatile uint32_t* sPtr = NULL;
+#endif
+
 	for ( int i = start; i < end; i += 32 )
 	{
 #pragma unroll
 		for ( int j = 0; j < 4; ++j )
-			text[j] ^= long_state[((IndexType) thread << THREAD_SHIFT) + (sub + i + j)];
+			text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
 
 		cn_aes_pseudo_round_mut( sharedMemory, text, key );
+		
+		if(ALGO == cryptonight_heavy)
+		{
+#pragma unroll
+			for ( int j = 0; j < 4; ++j )
+				text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
+		}
+	}
+
+	if(ALGO == cryptonight_heavy)
+	{
+		__syncthreads( );
+		
+		for ( int i = start; i < end; i += 32 )
+		{
+#pragma unroll
+			for ( int j = 0; j < 4; ++j )
+				text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
+
+			cn_aes_pseudo_round_mut( sharedMemory, text, key );
+
+#pragma unroll
+			for ( int j = 0; j < 4; ++j )
+				text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
+		}
 	}
 
 	MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 );
 }
 
-template<size_t ITERATIONS, uint32_t MASK, uint32_t THREAD_SHIFT>
-void cryptonight_core_gpu_hash(nvid_ctx* ctx)
+template<size_t ITERATIONS, uint32_t MASK, uint32_t MEMORY, xmrstak_algo ALGO>
+void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce)
 {
 	dim3 grid( ctx->device_blocks );
 	dim3 block( ctx->device_threads );
@@ -329,9 +431,11 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
 
 	for ( int i = 0; i < partcountOneThree; i++ )
 	{
-		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,THREAD_SHIFT><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<ITERATIONS,MEMORY><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
 			bfactorOneThree, i,
-			ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key1 ));
+			ctx->d_long_state, 
+			(ALGO == cryptonight_heavy ? ctx->d_ctx_state2 : ctx->d_ctx_state),
+			ctx->d_ctx_key1 ));
 
 		if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
 	}
@@ -342,7 +446,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
         CUDA_CHECK_MSG_KERNEL(
 			ctx->device_id,
 			"\n**suggestion: Try to increase the value of the attribute 'bfactor' or \nreduce 'threads' in the NVIDIA config file.**",
-			cryptonight_core_gpu_phase2<ITERATIONS,THREAD_SHIFT,MASK><<<
+			cryptonight_core_gpu_phase2<ITERATIONS,MEMORY,MASK,ALGO><<<
 				grid,
 				block4,
 				block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
@@ -352,7 +456,10 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
 				i,
 				ctx->d_long_state,
 				ctx->d_ctx_a,
-				ctx->d_ctx_b
+				ctx->d_ctx_b,
+				ctx->d_ctx_state,
+				nonce,
+				ctx->d_input
 			)
 	    );
 
@@ -361,25 +468,39 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx)
 
 	for ( int i = 0; i < partcountOneThree; i++ )
 	{
-		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,THREAD_SHIFT><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
 			bfactorOneThree, i,
 			ctx->d_long_state,
 			ctx->d_ctx_state, ctx->d_ctx_key2 ));
 	}
 }
 
-void cryptonight_core_cpu_hash(nvid_ctx* ctx, bool mineMonero)
+void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t startNonce, uint8_t version)
 {
-#ifndef CONF_NO_MONERO
-	if(mineMonero)
+
+	if(miner_algo == cryptonight_monero)
 	{
-		cryptonight_core_gpu_hash<MONERO_ITER, MONERO_MASK, 19u>(ctx);
+		if(version >= 7)
+			cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight_monero>(ctx, startNonce);
+		else
+			cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight>(ctx, startNonce);
 	}
-#endif
-#ifndef CONF_NO_AEON
-	if(!mineMonero)
+	else if(miner_algo == cryptonight_heavy)
 	{
-		cryptonight_core_gpu_hash<AEON_ITER, AEON_MASK, 18u>(ctx);
+		if(version >= 3)
+			cryptonight_core_gpu_hash<CRYPTONIGHT_HEAVY_ITER, CRYPTONIGHT_HEAVY_MASK, CRYPTONIGHT_HEAVY_MEMORY/4, cryptonight_heavy>(ctx, startNonce);
+		else
+		{
+			cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight>(ctx, startNonce);
+		}
 	}
-#endif
+	else if(miner_algo == cryptonight)
+	{
+		cryptonight_core_gpu_hash<CRYPTONIGHT_ITER, CRYPTONIGHT_MASK, CRYPTONIGHT_MEMORY/4, cryptonight>(ctx, startNonce);
+	}
+	else if(miner_algo == cryptonight_lite)
+	{
+		cryptonight_core_gpu_hash<CRYPTONIGHT_LITE_ITER, CRYPTONIGHT_LITE_MASK, CRYPTONIGHT_LITE_MEMORY/4, cryptonight_lite>(ctx, startNonce);
+	}
+
 }
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index 92259db5f4bf59ec8bdfedd22909030ad04ae442..2f08a1a4b19f770b5908dc389cf0944315110554 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -28,6 +28,7 @@ uint64_t keccakf_rndc[24] ={
 typedef unsigned char BitSequence;
 typedef unsigned long long DataLength;
 
+#include "xmrstak/backend/cryptonight.hpp"
 #include "cryptonight.hpp"
 #include "cuda_extra.hpp"
 #include "cuda_keccak.hpp"
@@ -36,6 +37,7 @@ typedef unsigned long long DataLength;
 #include "cuda_jh.hpp"
 #include "cuda_skein.hpp"
 #include "cuda_device.hpp"
+#include "cuda_aes.hpp"
 
 __constant__ uint8_t d_sub_byte[16][16] ={
 	{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 },
@@ -90,10 +92,33 @@ __device__ __forceinline__ void cryptonight_aes_set_key( uint32_t * __restrict__
 	}
 }
 
-__global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 )
+__device__ __forceinline__ void mix_and_propagate( uint32_t* state )
+{
+	uint32_t tmp0[4];
+	for(size_t x = 0; x < 4; ++x)
+		tmp0[x] = (state)[x];
+
+	// set destination [0,6]
+	for(size_t t = 0; t < 7; ++t)
+		for(size_t x = 0; x < 4; ++x)
+			(state + 4 * t)[x] = (state + 4 * t)[x] ^ (state + 4 * (t + 1))[x];
+
+	// set destination 7
+	for(size_t x = 0; x < 4; ++x)
+		(state + 4 * 7)[x] = (state + 4 * 7)[x] ^ tmp0[x];
+}
+
+template<xmrstak_algo ALGO>
+__global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_state2, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 )
 {
 	int thread = ( blockDim.x * blockIdx.x + threadIdx.x );
+	__shared__ uint32_t sharedMemory[1024];
 
+	if(ALGO == cryptonight_heavy)
+	{
+		cn_aes_gpu_init( sharedMemory );
+		__syncthreads( );
+	}
 	if ( thread >= threads )
 		return;
 
@@ -113,20 +138,45 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric
 	cn_keccak( (uint8_t *) input, len, (uint8_t *) ctx_state );
 	cryptonight_aes_set_key( ctx_key1, ctx_state );
 	cryptonight_aes_set_key( ctx_key2, ctx_state + 8 );
+
 	XOR_BLOCKS_DST( ctx_state, ctx_state + 8, ctx_a );
 	XOR_BLOCKS_DST( ctx_state + 4, ctx_state + 12, ctx_b );
-
-	memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 );
 	memcpy( d_ctx_a + thread * 4, ctx_a, 4 * 4 );
 	memcpy( d_ctx_b + thread * 4, ctx_b, 4 * 4 );
+
 	memcpy( d_ctx_key1 + thread * 40, ctx_key1, 40 * 4 );
 	memcpy( d_ctx_key2 + thread * 40, ctx_key2, 40 * 4 );
+	memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 );
+
+	if(ALGO == cryptonight_heavy)
+	{
+
+		for(int i=0; i < 16; i++)
+		{
+			for(size_t t = 4; t < 12; ++t)
+			{
+				cn_aes_pseudo_round_mut( sharedMemory, ctx_state + 4u * t, ctx_key1 );
+			}
+			// scipt first 4 * 128bit blocks = 4 * 4 uint32_t values
+			mix_and_propagate(ctx_state + 4 * 4);
+		}
+		// double buffer to move manipulated state into phase1
+		memcpy( d_ctx_state2 + thread * 50, ctx_state, 50 * 4 );
+	}
 }
 
-__global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state )
+template<xmrstak_algo ALGO>
+__global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state,uint32_t * __restrict__ d_ctx_key2 )
 {
 	const int thread = blockDim.x * blockIdx.x + threadIdx.x;
 
+	__shared__ uint32_t sharedMemory[1024];
+
+	if(ALGO == cryptonight_heavy)
+	{
+		cn_aes_gpu_init( sharedMemory );
+		__syncthreads( );
+	}
 	if ( thread >= threads )
 		return;
 
@@ -134,11 +184,28 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3
 	uint32_t * __restrict__ ctx_state = d_ctx_state + thread * 50;
 	uint64_t hash[4];
 	uint32_t state[50];
-
-#pragma unroll
+	
+	#pragma unroll
 	for ( i = 0; i < 50; i++ )
 		state[i] = ctx_state[i];
 
+	if(ALGO == cryptonight_heavy)
+	{
+		uint32_t key[40];
+
+		// load keys
+		MEMCPY8( key, d_ctx_key2 + thread * 40, 20 );
+
+		for(int i=0; i < 16; i++)
+		{
+			for(size_t t = 4; t < 12; ++t)
+			{
+				cn_aes_pseudo_round_mut( sharedMemory, state + 4u * t, key );
+			}
+			// scipt first 4 * 128bit blocks = 4 * 4 uint32_t values
+			mix_and_propagate(state + 4 * 4);
+		}
+	}
 	cn_keccakf2( (uint64_t *) state );
 
 	switch ( ( (uint8_t *) state )[0] & 0x03 )
@@ -212,23 +279,26 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
 	if(gpuArch < 70)
 		CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
-	size_t hashMemSize;
-	if(::jconf::inst()->IsCurrencyMonero())
-	{
-		hashMemSize = MONERO_MEMORY;
-	}
-	else
-	{
-		hashMemSize = AEON_MEMORY;
-	}
+	size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
 
 	size_t wsize = ctx->device_blocks * ctx->device_threads;
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize));
+	size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize;
+	if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo())
+	{
+		// extent ctx_b to hold the state of idx0
+		ctx_b_size += sizeof(uint32_t) * wsize;
+		// create a double buffer for the state to exchange the mixed state to phase1
+		CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state2, 50 * sizeof(uint32_t) * wsize));
+	}
+	else
+		ctx->d_ctx_state2 = ctx->d_ctx_state;
+	
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize));
-	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, 4 * sizeof(uint32_t) * wsize));
+	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, ctx_b_size));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) ));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) ));
 	CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) ));
@@ -239,7 +309,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
 	return 1;
 }
 
-extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce)
+extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce, xmrstak_algo miner_algo, uint8_t version)
 {
 	int threadsperblock = 128;
 	uint32_t wsize = ctx->device_blocks * ctx->device_threads;
@@ -247,11 +317,22 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce
 	dim3 grid( ( wsize + threadsperblock - 1 ) / threadsperblock );
 	dim3 block( threadsperblock );
 
-	CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce,
-		ctx->d_ctx_state, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 ));
+	if(miner_algo == cryptonight_heavy && version >= 3)
+	{
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<cryptonight_heavy><<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce,
+			ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 ));
+	}
+	else
+	{
+		/* pass two times d_ctx_state because the second state is used later in phase1,
+		 * the first is used than in phase3
+		 */
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<invalid_algo><<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce,
+			ctx->d_ctx_state, ctx->d_ctx_state, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 ));
+	}
 }
 
-extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce)
+extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce,xmrstak_algo miner_algo, uint8_t version)
 {
 	int threadsperblock = 128;
 	uint32_t wsize = ctx->device_blocks * ctx->device_threads;
@@ -262,11 +343,23 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce,
 	CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) ));
 	CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) ));
 
-	CUDA_CHECK_MSG_KERNEL(
-		ctx->device_id,
-		"\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
-		cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )
-	);
+	if(miner_algo == cryptonight_heavy && version >= 3)
+	{
+		CUDA_CHECK_MSG_KERNEL(
+			ctx->device_id,
+			"\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
+			cryptonight_extra_gpu_final<cryptonight_heavy><<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 )
+		);
+	}
+	else
+	{
+		// fallback for all other algorithms
+		CUDA_CHECK_MSG_KERNEL(
+			ctx->device_id,
+			"\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**",
+			cryptonight_extra_gpu_final<invalid_algo><<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 )
+		);
+	}
 
 	CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
 	CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost ));
@@ -482,15 +575,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		ctx->total_device_memory = totalMemory;
 		ctx->free_device_memory = freeMemory;
 
-		size_t hashMemSize;
-		if(::jconf::inst()->IsCurrencyMonero())
-		{
-			hashMemSize = MONERO_MEMORY;
-		}
-		else
-		{
-			hashMemSize = AEON_MEMORY;
-		}
+		size_t hashMemSize = cn_select_memory(::jconf::inst()->GetMiningAlgo());
 
 #ifdef WIN32
 		/* We use in windows bfactor (split slow kernel into smaller parts) to avoid
@@ -520,6 +605,9 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		// up to 16kibyte extra memory is used per thread for some kernel (lmem/local memory)
 		// 680bytes are extra meta data memory per hash
 		size_t perThread = hashMemSize + 16192u + 680u;
+		if(cryptonight_heavy == ::jconf::inst()->GetMiningAlgo())
+			perThread += 50 * 4; // state double buffer
+		
 		size_t max_intensity = limitedMemory / perThread;
 		ctx->device_threads = max_intensity / ctx->device_blocks;
 		// use only odd number of threads
diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp
index 9053844bd60af072c5cc160aebd9dc6e41fc432a..e2d50f2913e2c23960e5a3192abd72930c56a434 100644
--- a/xmrstak/cli/cli-miner.cpp
+++ b/xmrstak/cli/cli-miner.cpp
@@ -55,7 +55,7 @@
 #	include "xmrstak/misc/uac.hpp"
 #endif // _WIN32
 
-void do_benchmark();
+int do_benchmark(int block_version);
 
 void help()
 {
@@ -64,47 +64,50 @@ void help()
 
 	cout<<"Usage: "<<params::inst().binaryName<<" [OPTION]..."<<endl;
 	cout<<" "<<endl;
-	cout<<"  -h, --help            show this help"<<endl;
-	cout<<"  -v, --version         show version number"<<endl;
-	cout<<"  -V, --version-long    show long version number"<<endl;
-	cout<<"  -c, --config FILE     common miner configuration file"<<endl;
+	cout<<"  -h, --help                 show this help"<<endl;
+	cout<<"  -v, --version              show version number"<<endl;
+	cout<<"  -V, --version-long         show long version number"<<endl;
+	cout<<"  -c, --config FILE          common miner configuration file"<<endl;
+	cout<<"  -C, --poolconf FILE        pool configuration file"<<endl;
 #ifdef _WIN32
-	cout<<"  --noUAC               disable the UAC dialog"<<endl;
-#endif
-#if (!defined(CONF_NO_AEON)) && (!defined(CONF_NO_MONERO))
-	cout<<"  --currency NAME       currency to mine: monero or aeon"<<endl;
+	cout<<"  --noUAC                    disable the UAC dialog"<<endl;
 #endif
+	cout<<"  --currency NAME            currency to mine:"<<endl;
+	cout<<"  --benchmark BLOCKVERSION   ONLY do a 60-second benchmark and exit"<<endl;
 #ifndef CONF_NO_CPU
-	cout<<"  --noCPU               disable the CPU miner backend"<<endl;
-	cout<<"  --cpu FILE            CPU backend miner config file"<<endl;
+	cout<<"  --noCPU                    disable the CPU miner backend"<<endl;
+	cout<<"  --cpu FILE                 CPU backend miner config file"<<endl;
 #endif
 #ifndef CONF_NO_OPENCL
-	cout<<"  --noAMD               disable the AMD miner backend"<<endl;
-	cout<<"  --amd FILE            AMD backend miner config file"<<endl;
+	cout<<"  --noAMD                    disable the AMD miner backend"<<endl;
+	cout<<"  --amd FILE                 AMD backend miner config file"<<endl;
 #endif
 #ifndef CONF_NO_CUDA
-	cout<<"  --noNVIDIA            disable the NVIDIA miner backend"<<endl;
-	cout<<"  --nvidia FILE         NVIDIA backend miner config file"<<endl;
+	cout<<"  --noNVIDIA                 disable the NVIDIA miner backend"<<endl;
+	cout<<"  --nvidia FILE              NVIDIA backend miner config file"<<endl;
 #endif
 #ifndef CONF_NO_HTTPD
-	cout<<"  -i --httpd HTTP_PORT  HTTP interface port"<<endl;
+	cout<<"  -i --httpd HTTP_PORT       HTTP interface port"<<endl;
 #endif
 	cout<<" "<<endl;
 	cout<<"The following options can be used for automatic start without a guided config,"<<endl;
 	cout<<"If config exists then this pool will be top priority."<<endl;
-	cout<<"  -o, --url URL         pool url and port, e.g. pool.usxmrpool.com:3333"<<endl;
-	cout<<"  -O, --tls-url URL     TLS pool url and port, e.g. pool.usxmrpool.com:10443"<<endl;
-	cout<<"  -u, --user USERNAME   pool user name or wallet address"<<endl;
-	cout<<"  -r, --rigid RIGID     rig identifier for pool-side statistics (needs pool support)"<<endl;
-	cout<<"  -p, --pass PASSWD     pool password, in the most cases x or empty \"\""<<endl;
-	cout<<"  --use-nicehash        the pool should run in nicehash mode"<<endl;
-	cout<<" \n"<<endl;
+	cout<<"  -o, --url URL              pool url and port, e.g. pool.usxmrpool.com:3333"<<endl;
+	cout<<"  -O, --tls-url URL          TLS pool url and port, e.g. pool.usxmrpool.com:10443"<<endl;
+	cout<<"  -u, --user USERNAME        pool user name or wallet address"<<endl;
+	cout<<"  -r, --rigid RIGID          rig identifier for pool-side statistics (needs pool support)"<<endl;
+	cout<<"  -p, --pass PASSWD          pool password, in the most cases x or empty \"\""<<endl;
+	cout<<"  --use-nicehash             the pool should run in nicehash mode"<<endl;
+	cout<< endl;
 #ifdef _WIN32
 	cout<<"Environment variables:\n"<<endl;
-	cout<<"  XMRSTAK_NOWAIT        disable the dialog `Press any key to exit."<<std::endl;
-	cout<<"                	       for non UAC execution"<<endl;
-	cout<<" \n"<<endl;
+	cout<<"  XMRSTAK_NOWAIT             disable the dialog `Press any key to exit."<<std::endl;
+	cout<<"                	            for non UAC execution"<<endl;
+	cout<< endl;
 #endif
+	std::string algos;
+	jconf::GetAlgoList(algos);
+	cout<< "Supported coin opitons: " << endl << algos << endl; 
 	cout<< "Version: " << get_version_str_short() << endl;
 	cout<<"Brought to by fireice_uk and psychocrypt under GPLv3."<<endl;
 }
@@ -133,10 +136,7 @@ std::string get_multipool_entry(bool& final)
 	std::cout<<std::endl<<"- Next Pool:"<<std::endl<<std::endl;
 
 	std::string pool;
-	if(xmrstak::params::inst().currency == "monero")
-		std::cout<<"- Pool address: e.g. pool.usxmrpool.com:3333"<<std::endl;
-	else
-		std::cout<<"- Pool address: e.g. mine.aeon-pool.com:5555"<<std::endl;
+	std::cout<<"- Pool address: e.g. " << jconf::GetDefaultPool(xmrstak::params::inst().currency.c_str()) << std::endl;
 	std::cin >> pool;
 
 	std::string userName;
@@ -149,7 +149,6 @@ std::string get_multipool_entry(bool& final)
 	getline(std::cin, passwd);
 	
 	std::string rigid;
-	std::cin.clear(); std::cin.ignore(INT_MAX,'\n');
 	std::cout<<"- Rig identifier for pool-side statistics (needs pool support). Can be empty:"<<std::endl;
 	getline(std::cin, rigid);
 
@@ -185,61 +184,36 @@ inline void prompt_once(bool& prompted)
 	}
 }
 
-void do_guided_config()
+void do_guided_pool_config()
 {
 	using namespace xmrstak;
 
 	// load the template of the backend config into a char variable
 	const char *tpl =
-		#include "../config.tpl"
+		#include "../pools.tpl"
 	;
 
 	configEditor configTpl{};
 	configTpl.set(std::string(tpl));
 	bool prompted = false;
-	
+
 	auto& currency = params::inst().currency;
-	if(currency.empty())
+	if(currency.empty() || !jconf::IsOnAlgoList(currency))
 	{
 		prompt_once(prompted);
 
 		std::string tmp;
-#if defined(CONF_NO_AEON)
-		tmp = "monero";
-#elif defined(CONF_NO_MONERO)
-		tmp = "aeon";
-#endif
-		while(tmp != "monero" && tmp != "aeon")
+		while(tmp.empty() || !jconf::IsOnAlgoList(tmp))
 		{
-			std::cout<<"- Currency: 'monero' or 'aeon'"<<std::endl;
+			std::string list;
+			jconf::GetAlgoList(list);
+			std::cout << "- Please enter the currency that you want to mine: "<<std::endl;
+			std::cout << list << std::endl;
 			std::cin >> tmp;
-			std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
 		} 
 		currency = tmp;
 	}
 
-	auto& http_port = params::inst().httpd_port;
-	if(http_port == params::httpd_port_unset)
-	{
-#if defined(CONF_NO_HTTPD)
-		http_port = params::httpd_port_disabled;
-#else
-		std::cout<<"- Do you want to use the HTTP interface?" <<std::endl;
-		std::cout<<"Unlike the screen display, browser interface is not affected by the GPU lag." <<std::endl;
-		std::cout<<"If you don't want to use it, please enter 0, otherwise enter port number that the miner should listen on" <<std::endl;
-
-		int32_t port;
-		while(!(std::cin >> port) || port < 0 || port > 65535)
-		{
-			std::cin.clear();
-			std::cin.ignore(INT_MAX, '\n');
-			std::cout << "Invalid port number. Please enter a number between 0 and 65535." << std::endl;
-		}
-
-		http_port = port;
-#endif
-	}
-
 	auto& pool = params::inst().poolURL;
 	bool userSetPool = true;
 	if(pool.empty())
@@ -247,10 +221,7 @@ void do_guided_config()
 		prompt_once(prompted);
 
 		userSetPool = false;
-		if(currency == "monero")
-			std::cout<<"- Pool address: e.g. pool.usxmrpool.com:3333"<<std::endl;
-		else
-			std::cout<<"- Pool address: e.g. mine.aeon-pool.com:5555"<<std::endl;
+		std::cout<<"- Pool address: e.g. " << jconf::GetDefaultPool(xmrstak::params::inst().currency.c_str()) << std::endl;
 		std::cin >> pool;
 	}
 
@@ -263,6 +234,7 @@ void do_guided_config()
 		std::cin >> userName;
 	}
 
+	bool stdin_flushed = false;
 	auto& passwd = params::inst().poolPasswd;
 	if(passwd.empty() && !params::inst().userSetPwd)
 	{
@@ -270,6 +242,8 @@ void do_guided_config()
 
 		// clear everything from stdin to allow an empty password
 		std::cin.clear(); std::cin.ignore(INT_MAX,'\n');
+		stdin_flushed = true;
+
 		std::cout<<"- Password (mostly empty or x):"<<std::endl;
 		getline(std::cin, passwd);
 	}
@@ -279,8 +253,12 @@ void do_guided_config()
 	{
 		prompt_once(prompted);
 
-		// clear everything from stdin to allow an empty rigid
-		std::cin.clear(); std::cin.ignore(INT_MAX,'\n');
+		if(!stdin_flushed)
+		{
+			// clear everything from stdin to allow an empty rigid
+			std::cin.clear(); std::cin.ignore(INT_MAX,'\n');
+		}
+
 		std::cout<<"- Rig identifier for pool-side statistics (needs pool support). Can be empty:"<<std::endl;
 		getline(std::cin, rigid);
 	}
@@ -346,8 +324,49 @@ void do_guided_config()
 		while(!final);
 	}
 
-	configTpl.replace("POOLCONF", pool_table);
 	configTpl.replace("CURRENCY", currency);
+	configTpl.replace("POOLCONF", pool_table);
+	configTpl.write(params::inst().configFilePools);
+	std::cout<<"Pool configuration stored in file '"<<params::inst().configFilePools<<"'"<<std::endl;
+}
+
+void do_guided_config()
+{
+	using namespace xmrstak;
+
+	// load the template of the backend config into a char variable
+	const char *tpl =
+		#include "../config.tpl"
+	;
+
+	configEditor configTpl{};
+	configTpl.set(std::string(tpl));
+	bool prompted = false;
+
+	auto& http_port = params::inst().httpd_port;
+	if(http_port == params::httpd_port_unset)
+	{
+#if defined(CONF_NO_HTTPD)
+		http_port = params::httpd_port_disabled;
+#else
+		prompt_once(prompted);
+
+		std::cout<<"- Do you want to use the HTTP interface?" <<std::endl;
+		std::cout<<"Unlike the screen display, browser interface is not affected by the GPU lag." <<std::endl;
+		std::cout<<"If you don't want to use it, please enter 0, otherwise enter port number that the miner should listen on" <<std::endl;
+
+		int32_t port;
+		while(!(std::cin >> port) || port < 0 || port > 65535)
+		{
+			std::cin.clear();
+			std::cin.ignore(INT_MAX, '\n');
+			std::cout << "Invalid port number. Please enter a number between 0 and 65535." << std::endl;
+		}
+
+		http_port = port;
+#endif
+	}
+
 	configTpl.replace("HTTP_PORT", std::to_string(http_port));
 	configTpl.write(params::inst().configFile);
 	std::cout<<"Configuration stored in file '"<<params::inst().configFile<<"'"<<std::endl;
@@ -574,6 +593,17 @@ int main(int argc, char *argv[])
 			}
 			params::inst().configFile = argv[i];
 		}
+		else if(opName.compare("-C") == 0 || opName.compare("--poolconf") == 0)
+		{
+			++i;
+			if( i >=argc )
+			{
+				printer::inst()->print_msg(L0, "No argument for parameter '-C/--poolconf' given");
+				win_exit();
+				return 1;
+			}
+			params::inst().configFilePools = argv[i];
+		}
 		else if(opName.compare("-i") == 0 || opName.compare("--httpd") == 0)
 		{
 			++i;
@@ -600,6 +630,25 @@ int main(int argc, char *argv[])
 		{
 			params::inst().allowUAC = false;
 		}
+		else if(opName.compare("--benchmark") == 0)
+		{
+			++i;
+			if( i >= argc )
+			{
+				printer::inst()->print_msg(L0, "No argument for parameter '--benchmark' given");
+				win_exit();
+				return 1;
+			}
+			char* block_version = nullptr;
+			long int bversion = strtol(argv[i], &block_version, 10);
+
+			if(bversion < 0 || bversion >= 256)
+			{
+				printer::inst()->print_msg(L0, "Benchmark block version must be in the range [0,255]");
+				return 1;
+			}
+			params::inst().benchmark_block_version = bversion;
+		}
 		else
 		{
 			printer::inst()->print_msg(L0, "Parameter unknown '%s'",argv[i]);
@@ -612,7 +661,10 @@ int main(int argc, char *argv[])
 	if(!configEditor::file_exist(params::inst().configFile))
 		do_guided_config();
 
-	if(!jconf::inst()->parse_config(params::inst().configFile.c_str()))
+	if(!configEditor::file_exist(params::inst().configFilePools))
+		do_guided_pool_config();
+
+	if(!jconf::inst()->parse_config(params::inst().configFile.c_str(), params::inst().configFilePools.c_str()))
 	{
 		win_exit();
 		return 1;
@@ -670,11 +722,14 @@ int main(int argc, char *argv[])
 	printer::inst()->print_str("'r' - results\n");
 	printer::inst()->print_str("'c' - connection\n");
 	printer::inst()->print_str("-------------------------------------------------------------------\n");
-	if(::jconf::inst()->IsCurrencyMonero())
-		printer::inst()->print_msg(L0,"Start mining: MONERO");
-	else
-		printer::inst()->print_msg(L0,"Start mining: AEON");
+	printer::inst()->print_msg(L0, "Mining coin: %s", jconf::inst()->GetMiningCoin().c_str());
 
+	if(params::inst().benchmark_block_version >= 0)
+	{
+		printer::inst()->print_str("!!!! Doing only a benchmark and exiting. To mine, remove the '--benchmark' option. !!!!\n");
+		return do_benchmark(params::inst().benchmark_block_version);
+	}
+	
 	executor::inst()->ex_start(jconf::inst()->DaemonMode());
 
 	uint64_t lastTime = get_timestamp_ms();
@@ -709,23 +764,31 @@ int main(int argc, char *argv[])
 	return 0;
 }
 
-void do_benchmark()
+int do_benchmark(int block_version)
 {
 	using namespace std::chrono;
 	std::vector<xmrstak::iBackend*>* pvThreads;
 
-	printer::inst()->print_msg(L0, "Running a 60 second benchmark...");
+	printer::inst()->print_msg(L0, "Prepare benchmark for block version %d", block_version);
+
+	uint8_t work[112];
+	memset(work,0,112);
+	work[0] = static_cast<uint8_t>(block_version);
 
-	uint8_t work[76] = {0};
-	xmrstak::miner_work oWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0);
+	xmrstak::pool_data dat;
+
+	xmrstak::miner_work oWork = xmrstak::miner_work();
 	pvThreads = xmrstak::BackendConnector::thread_starter(oWork);
 
+	printer::inst()->print_msg(L0, "Wait 30 sec until all backends are initialized");
+	std::this_thread::sleep_for(std::chrono::seconds(30));
+
+	xmrstak::miner_work benchWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0);
+	printer::inst()->print_msg(L0, "Start a 60 second benchmark...");
+	xmrstak::globalStates::inst().switch_work(benchWork, dat);
 	uint64_t iStartStamp = get_timestamp_ms();
 
 	std::this_thread::sleep_for(std::chrono::seconds(60));
-
-	oWork = xmrstak::miner_work();
-	xmrstak::pool_data dat;
 	xmrstak::globalStates::inst().switch_work(oWork, dat);
 
 	double fTotalHps = 0.0;
@@ -734,9 +797,13 @@ void do_benchmark()
 		double fHps = pvThreads->at(i)->iHashCount;
 		fHps /= (pvThreads->at(i)->iTimestamp - iStartStamp) / 1000.0;
 
-		printer::inst()->print_msg(L0, "Thread %u: %.1f H/S", i, fHps);
+		auto bType = static_cast<xmrstak::iBackend::BackendType>(pvThreads->at(i)->backendType);
+		std::string name(xmrstak::iBackend::getName(bType));
+
+		printer::inst()->print_msg(L0, "Benchmark Thread %u %s: %.1f H/S", i,name.c_str(), fHps);
 		fTotalHps += fHps;
 	}
 
-	printer::inst()->print_msg(L0, "Total: %.1f H/S", fTotalHps);
+	printer::inst()->print_msg(L0, "Benchmark Total: %.1f H/S", fTotalHps);
+	return 0;
 }
diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl
index 451ea7bc57cbaf82d2390a734704e72478a90873..c95d142679f55e79186b5a82f5ca9949469cdf20 100644
--- a/xmrstak/config.tpl
+++ b/xmrstak/config.tpl
@@ -1,27 +1,4 @@
 R"===(
-/*
- * pool_address    - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported.
- * wallet_address  - Your wallet, or pool login.
- * rig_id          - Rig identifier for pool-side statistics (needs pool support).
- * pool_password   - Can be empty in most cases or "x".
- * use_nicehash    - Limit the nonce to 3 bytes as required by nicehash.
- * use_tls         - This option will make us connect using Transport Layer Security.
- * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it.
- * pool_weight     - Pool weight is a number telling the miner how important the pool is. Miner will mine mostly at the pool 
- *                   with the highest weight, unless the pool fails. Weight must be an integer larger than 0.
- *
- * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com
- */
-"pool_list" :
-[
-POOLCONF],
-
-/*
- * currency to mine
- * allowed values: 'monero' or 'aeon'
- */
-"currency" : "CURRENCY",
-
 /*
  * Network timeouts.
  * Because of the way this client is written it doesn't need to constantly talk (keep-alive) to the server to make 
diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp
index c9d3a20c92d8d7adabc3024920a4d770ae1bf708..225fbe031ff09090cd6697b93416f61aa6aa9e97 100644
--- a/xmrstak/jconf.cpp
+++ b/xmrstak/jconf.cpp
@@ -51,7 +51,7 @@ using namespace rapidjson;
  * This enum needs to match index in oConfigValues, otherwise we will get a runtime error
  */
 enum configEnum {
-	aPoolList, bTlsSecureAlgo, sCurrency, iCallTimeout, iNetRetry, iGiveUpLimit, iVerboseLevel, bPrintMotd, iAutohashTime, 
+	aPoolList, sCurrency, bTlsSecureAlgo, iCallTimeout, iNetRetry, iGiveUpLimit, iVerboseLevel, bPrintMotd, iAutohashTime, 
 	bFlushStdout, bDaemonMode, sOutputFile, iHttpdPort, sHttpLogin, sHttpPass, bPreferIpv4, bAesOverride, sUseSlowMem 
 };
 
@@ -65,8 +65,8 @@ struct configVal {
 // kNullType means any type
 configVal oConfigValues[] = {
 	{ aPoolList, "pool_list", kArrayType },
-	{ bTlsSecureAlgo, "tls_secure_algo", kTrueType },
 	{ sCurrency, "currency", kStringType },
+	{ bTlsSecureAlgo, "tls_secure_algo", kTrueType },
 	{ iCallTimeout, "call_timeout", kNumberType },
 	{ iNetRetry, "retry_time", kNumberType },
 	{ iGiveUpLimit, "giveup_limit", kNumberType },
@@ -86,6 +86,28 @@ configVal oConfigValues[] = {
 
 constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0]));
 
+struct xmrstak_coin_algo
+{
+	const char* coin_name;
+	xmrstak_algo algo;
+	const char* default_pool;
+};
+
+xmrstak_coin_algo coin_algos[] = { 
+	{ "aeon", cryptonight_lite, "mine.aeon-pool.com:5555" },
+	{ "cryptonight", cryptonight, nullptr },
+	{ "cryptonight_lite", cryptonight_lite, nullptr },
+	{ "edollar", cryptonight, nullptr },
+	{ "electroneum", cryptonight, nullptr },
+	{ "graft", cryptonight, nullptr },
+	{ "intense", cryptonight, nullptr },
+	{ "karbo", cryptonight, nullptr },
+	{ "monero2", cryptonight_monero, "pool.usxmrpool.com:3333" },
+	{ "sumokoin", cryptonight_heavy, nullptr }
+};
+
+constexpr size_t coin_alogo_size = (sizeof(coin_algos)/sizeof(coin_algos[0]));
+
 inline bool checkType(Type have, Type want)
 {
 	if(want == have)
@@ -103,6 +125,7 @@ inline bool checkType(Type have, Type want)
 struct jconf::opaque_private
 {
 	Document jsonDoc;
+	Document jsonDocPools;
 	const Value* configValues[iConfigCnt]; //Compile time constant
 
 	opaque_private()
@@ -168,45 +191,6 @@ bool jconf::TlsSecureAlgos()
 	return prv->configValues[bTlsSecureAlgo]->GetBool();
 }
 
-const std::string jconf::GetCurrency()
-{
-	auto& currency = xmrstak::params::inst().currency;
-	if(currency.empty())
-		currency = prv->configValues[sCurrency]->GetString();
-	if(
-#ifndef CONF_NO_MONERO
-			// if monero is disabled at compile time, enable error message if selected currency is `monero`
-			!xmrstak::strcmp_i(currency, "monero")
-#else
-			true
-#endif
-			&&
-#ifndef CONF_NO_AEON
-			// if aeon is disabled at compile time, enable error message if selected currency is `aeon`
-			!xmrstak::strcmp_i(currency, "aeon")
-#else
-			true
-#endif
-	)
-	{
-		printer::inst()->print_msg(L0, "ERROR: Wrong currency selected - '%s'.", currency.c_str());
-		win_exit();
-	}
-	return currency;
-}
-
-bool jconf::IsCurrencyMonero()
-{
-	if(xmrstak::strcmp_i(GetCurrency(), "monero"))
-	{
-		return true;
-	}
-	else
-	{
-		return false;
-	}
-}
-
 bool jconf::PreferIpv4()
 {
 	return prv->configValues[bPreferIpv4]->GetBool();
@@ -312,18 +296,68 @@ jconf::slow_mem_cfg jconf::GetSlowMemSetting()
 		return unknown_value;
 }
 
-bool jconf::parse_config(const char* sFilename)
+std::string jconf::GetMiningCoin()
 {
-	FILE * pFile;
-	char * buffer;
-	size_t flen;
+	if(xmrstak::params::inst().currency.length() > 0)
+		return xmrstak::params::inst().currency;
+	else
+		return prv->configValues[sCurrency]->GetString();
+}
 
-	if(!check_cpu_features())
+void jconf::GetAlgoList(std::string& list)
+{
+	list.reserve(256);
+	for(size_t i=0; i < coin_alogo_size; i++)
 	{
-		printer::inst()->print_msg(L0, "CPU support of SSE2 is required.");
+		list += "\t- ";
+		list += coin_algos[i].coin_name;
+		list += "\n";
+	}
+}
+
+bool jconf::IsOnAlgoList(std::string& needle)
+{
+	std::transform(needle.begin(), needle.end(), needle.begin(), ::tolower);
+	
+	if(needle == "monero")
+	{
+		printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n"
+			"Please use monero2 if you want to mine Monero, or name the coin that you want to mine.");
 		return false;
 	}
 
+	for(size_t i=0; i < coin_alogo_size; i++)
+	{
+		if(needle == coin_algos[i].coin_name)
+			return true;
+	}
+	return false;
+}
+
+const char* jconf::GetDefaultPool(const char* needle)
+{
+	const char* default_example = "pool.example.com:3333";
+	
+	for(size_t i=0; i < coin_alogo_size; i++)
+	{
+		if(strcmp(needle, coin_algos[i].coin_name) == 0)
+		{
+			if(coin_algos[i].default_pool != nullptr)
+				return coin_algos[i].default_pool;
+			else
+				return default_example;
+		}
+	}
+
+	return default_example;
+}
+
+bool jconf::parse_file(const char* sFilename, bool main_conf)
+{
+	FILE * pFile;
+	char * buffer;
+	size_t flen;
+
 	pFile = fopen(sFilename, "rb");
 	if (pFile == NULL)
 	{
@@ -372,46 +406,92 @@ bool jconf::parse_config(const char* sFilename)
 	buffer[flen] = '}';
 	buffer[flen + 1] = '\0';
 
-	prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2);
+	Document& root = main_conf ? prv->jsonDoc : prv->jsonDocPools;
+
+	root.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2);
 	free(buffer);
 
-	if(prv->jsonDoc.HasParseError())
+	if(root.HasParseError())
 	{
-		printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s",
-			int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError()));
+		printer::inst()->print_msg(L0, "JSON config parse error in '%s' (offset %llu): %s",
+			sFilename, int_port(root.GetErrorOffset()), GetParseError_En(root.GetParseError()));
 		return false;
 	}
 
-
-	if(!prv->jsonDoc.IsObject())
+	if(!root.IsObject())
 	{ //This should never happen as we created the root ourselves
-		printer::inst()->print_msg(L0, "Invalid config file. No root?\n");
+		printer::inst()->print_msg(L0, "Invalid config file '%s'. No root?", sFilename);
 		return false;
 	}
 
-	for(size_t i = 0; i < iConfigCnt; i++)
+	if(main_conf)
 	{
-		if(oConfigValues[i].iName != i)
+		for(size_t i = 2; i < iConfigCnt; i++)
 		{
-			printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order.");
-			return false;
-		}
+			if(oConfigValues[i].iName != i)
+			{
+				printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order.");
+				return false;
+			}
 
-		prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName);
+			prv->configValues[i] = GetObjectMember(root, oConfigValues[i].sName);
 
-		if(prv->configValues[i] == nullptr)
-		{
-			printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName);
-			return false;
-		}
+			if(prv->configValues[i] == nullptr)
+			{
+				printer::inst()->print_msg(L0, "Invalid config file '%s'. Missing value \"%s\".", sFilename, oConfigValues[i].sName);
+				return false;
+			}
 
-		if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType))
+			if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType))
+			{
+				printer::inst()->print_msg(L0, "Invalid config file '%s'. Value \"%s\" has unexpected type.", sFilename, oConfigValues[i].sName);
+				return false;
+			}
+		}
+	}
+	else
+	{
+		for(size_t i = 0; i < 2; i++)
 		{
-			printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName);
-			return false;
+			if(oConfigValues[i].iName != i)
+			{
+				printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order.");
+				return false;
+			}
+
+			prv->configValues[i] = GetObjectMember(root, oConfigValues[i].sName);
+
+			if(prv->configValues[i] == nullptr)
+			{
+				printer::inst()->print_msg(L0, "Invalid config file '%s'. Missing value \"%s\".", sFilename, oConfigValues[i].sName);
+				return false;
+			}
+
+			if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType))
+			{
+				printer::inst()->print_msg(L0, "Invalid config file '%s'. Value \"%s\" has unexpected type.", sFilename, oConfigValues[i].sName);
+				return false;
+			}
 		}
 	}
 
+	return true;
+}
+
+bool jconf::parse_config(const char* sFilename, const char* sFilenamePools)
+{
+	if(!check_cpu_features())
+	{
+		printer::inst()->print_msg(L0, "CPU support of SSE2 is required.");
+		return false;
+	}
+
+	if(!parse_file(sFilename, true))
+		return false;
+
+	if(!parse_file(sFilenamePools, false))
+		return false;
+
 	size_t pool_cnt = prv->configValues[aPoolList]->Size();
 	if(pool_cnt == 0)
 	{
@@ -529,5 +609,38 @@ bool jconf::parse_config(const char* sFilename)
 		}
 	}
 
+	std::string ctmp = GetMiningCoin();
+	std::transform(ctmp.begin(), ctmp.end(), ctmp.begin(), ::tolower);
+
+	if(ctmp.length() == 0)
+	{
+		printer::inst()->print_msg(L0, "You need to specify the coin that you want to mine.");
+		return false;
+	}
+
+	for(size_t i=0; i < coin_alogo_size; i++)
+	{
+		if(ctmp == "monero")
+		{
+			printer::inst()->print_msg(L0, "You entered Monero as coin name. Monero will hard-fork the PoW.\nThis means it will stop being compatible with other cryptonight coins.\n"
+				"Please use monero2 if you want to mine Monero, or name the coin that you want to mine.");
+			return false;
+		}
+
+		if(ctmp == coin_algos[i].coin_name)
+		{
+			mining_algo = coin_algos[i].algo;
+			break;
+		}
+	}
+
+	if(mining_algo == invalid_algo)
+	{
+		std::string cl;
+		GetAlgoList(cl);
+		printer::inst()->print_msg(L0, "Unrecognised coin '%s', your options are:\n%s", ctmp.c_str(), cl.c_str());
+		return false;
+	}
+
 	return true;
 }
diff --git a/xmrstak/jconf.hpp b/xmrstak/jconf.hpp
index 9a4e958f6f1972d2cdf0f2cb3ccd3a03d9689727..6874d37ba3a27777c59b5cc34bb99133a0ceee91 100644
--- a/xmrstak/jconf.hpp
+++ b/xmrstak/jconf.hpp
@@ -1,12 +1,12 @@
 #pragma once
 
+#include "xmrstak/backend/cryptonight.hpp"
 #include "xmrstak/misc/environment.hpp"
 #include "params.hpp"
 
 #include <stdlib.h>
 #include <string>
 
-
 class jconf
 {
 public:
@@ -18,7 +18,7 @@ public:
 		return env.pJconfConfig;
 	};
 
-	bool parse_config(const char* sFilename = xmrstak::params::inst().configFile.c_str());
+	bool parse_config(const char* sFilename, const char* sFilenamePools);
 
 	struct pool_cfg {
 		const char* sPoolAddr;
@@ -48,8 +48,13 @@ public:
 
 	bool TlsSecureAlgos();
 
-	const std::string GetCurrency();
-	bool IsCurrencyMonero();
+	inline xmrstak_algo GetMiningAlgo() { return mining_algo; }
+	
+	std::string GetMiningCoin();
+
+	static void GetAlgoList(std::string& list);
+	static bool IsOnAlgoList(std::string& needle);
+	static const char* GetDefaultPool(const char* needle);
 
 	uint64_t GetVerboseLevel();
 	bool PrintMotd();
@@ -78,9 +83,12 @@ public:
 private:
 	jconf();
 
+	bool parse_file(const char* sFilename, bool main_conf);
+
 	bool check_cpu_features();
 	struct opaque_private;
 	opaque_private* prv;
 
 	bool bHaveAes;
+	xmrstak_algo mining_algo;
 };
diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp
index c4ba26ead532dfcaaea1175f269bbafc8789543e..a62017347ab6f4be340ed4cb62fe89b231249f9a 100644
--- a/xmrstak/misc/executor.cpp
+++ b/xmrstak/misc/executor.cpp
@@ -329,11 +329,14 @@ void executor::on_sock_ready(size_t pool_id)
 
 	if(!pool->cmd_login())
 	{
-		if(!pool->have_sock_error())
+		if(pool->have_call_error() && !pool->is_dev_pool())
 		{
-			log_socket_error(pool, pool->get_call_error());
-			pool->disconnect();
+			std::string str = "Login error: " +  pool->get_call_error();
+			log_socket_error(pool, std::move(str));
 		}
+		
+		if(!pool->have_sock_error())
+			pool->disconnect();
 	}
 }
 
@@ -406,14 +409,19 @@ void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob)
 void executor::on_miner_result(size_t pool_id, job_result& oResult)
 {
 	jpsock* pool = pick_pool_by_id(pool_id);
-	bool is_monero = jconf::inst()->IsCurrencyMonero();
+
+	const char* backend_name = xmrstak::iBackend::getName(pvThreads->at(oResult.iThreadId)->backendType);
+	uint64_t backend_hashcount, total_hashcount = 0;
+
+	backend_hashcount = pvThreads->at(oResult.iThreadId)->iHashCount.load(std::memory_order_relaxed);
+	for(size_t i = 0; i < pvThreads->size(); i++)
+		total_hashcount += pvThreads->at(i)->iHashCount.load(std::memory_order_relaxed);
 
 	if(pool->is_dev_pool())
 	{
 		//Ignore errors silently
 		if(pool->is_running() && pool->is_logged_in())
-			pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, pvThreads->at(oResult.iThreadId), is_monero);
-
+			pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, backend_name, backend_hashcount, total_hashcount, jconf::inst()->GetMiningAlgo());
 		return;
 	}
 
@@ -424,7 +432,7 @@ void executor::on_miner_result(size_t pool_id, job_result& oResult)
 	}
 
 	size_t t_start = get_timestamp_ms();
-	bool bResult = pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, pvThreads->at(oResult.iThreadId), is_monero);
+	bool bResult = pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult, backend_name, backend_hashcount, total_hashcount, jconf::inst()->GetMiningAlgo());
 	size_t t_len = get_timestamp_ms() - t_start;
 
 	if(t_len > 0xFFFF)
@@ -540,19 +548,38 @@ void executor::ex_main()
 		pools.emplace_back(i+1, params.poolURL.c_str(), params.poolUsername.c_str(), params.poolRigid.c_str(), params.poolPasswd.c_str(), 9.9, false, params.poolUseTls, "", params.nicehashMode);
 	}
 
-	if(jconf::inst()->IsCurrencyMonero())
+	switch(jconf::inst()->GetMiningAlgo())
 	{
+	case cryptonight_heavy:
 		if(dev_tls)
-			pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false);
+			pools.emplace_front(0, "donate.xmr-stak.net:8888", "", "", "", 0.0, true, true, "", true);
 		else
-			pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false);
-	}
-	else
-	{
+			pools.emplace_front(0, "donate.xmr-stak.net:5555", "", "", "", 0.0, true, false, "", true);
+		break;
+	
+	case cryptonight_monero:
+		if(dev_tls)
+			pools.emplace_front(0, "donate.xmr-stak.net:8800", "", "", "", 0.0, true, true, "", false);
+		else
+			pools.emplace_front(0, "donate.xmr-stak.net:5500", "", "", "", 0.0, true, false, "", false);
+		break;
+	
+	case cryptonight_lite:
 		if(dev_tls)
 			pools.emplace_front(0, "donate.xmr-stak.net:7777", "", "", "", 0.0, true, true, "", true);
 		else
 			pools.emplace_front(0, "donate.xmr-stak.net:4444", "", "", "", 0.0, true, false, "", true);
+		break;
+
+	case cryptonight:
+		if(dev_tls)
+			pools.emplace_front(0, "donate.xmr-stak.net:6666", "", "", "", 0.0, true, true, "", false);
+		else
+			pools.emplace_front(0, "donate.xmr-stak.net:3333", "", "", "", 0.0, true, false, "", false);
+		break;
+
+	default:
+		break;
 	}
 
 	ex_event ev;
diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp
index 9c413dcd353f916c1e992b8f39fa0827c5f24b87..95bcc9c8599646bb88bc9916a01a8d7ba0a99315 100644
--- a/xmrstak/net/jpsock.cpp
+++ b/xmrstak/net/jpsock.cpp
@@ -24,6 +24,7 @@
 #include <stdarg.h>
 #include <assert.h>
 #include <algorithm>
+#include <chrono>
 
 #include "jpsock.hpp"
 #include "socks.hpp"
@@ -133,6 +134,7 @@ jpsock::~jpsock()
 
 std::string&& jpsock::get_call_error()
 {
+	call_error = false;
 	return std::move(prv->oCallRsp.sCallErr);
 }
 
@@ -189,11 +191,25 @@ bool jpsock::set_socket_error_strerr(const char* a, int res)
 void jpsock::jpsock_thread()
 {
 	jpsock_thd_main();
+
+	if(!bHaveSocketError)
+		set_socket_error("Socket closed.");
+
 	executor::inst()->push_event(ex_event(std::move(sSocketError), quiet_close, pool_id));
 
-	// If a call is wating, send an error to end it
-	bool bCallWaiting = false;
 	std::unique_lock<std::mutex> mlock(call_mutex);
+	bool bWait = prv->oCallRsp.pCallData != nullptr;
+
+	// If a call is waiting, wait a little bit before blowing it out of the water
+	if(bWait)
+	{
+		mlock.unlock();
+		std::this_thread::sleep_for(std::chrono::milliseconds(500));
+		mlock.lock();
+	}
+
+	// If the call is still there send an error to end it
+	bool bCallWaiting = false;
 	if(prv->oCallRsp.pCallData != nullptr)
 	{
 		prv->oCallRsp.bHaveResponse = true;
@@ -348,6 +364,7 @@ bool jpsock::process_line(char* line, size_t len)
 		{
 			prv->oCallRsp.pCallData = nullptr;
 			prv->oCallRsp.sCallErr.assign(sError, iErrorLn);
+			call_error = true;
 		}
 		else
 			prv->oCallRsp.pCallData->CopyFrom(*mt, prv->callAllocator);
@@ -440,6 +457,7 @@ bool jpsock::connect(std::string& sConnectError)
 {
 	ext_algo = ext_backend = ext_hashcount = ext_motd = false;
 	bHaveSocketError = false;
+	call_error = false;
 	sSocketError.clear();
 	iJobDiff = 0;
 	connect_attempts++;
@@ -596,7 +614,7 @@ bool jpsock::cmd_login()
 	return true;
 }
 
-bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, xmrstak::iBackend* bend, bool algo_full_cn)
+bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo)
 {
 	char cmd_buffer[1024];
 	char sNonce[9];
@@ -604,16 +622,35 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes
 	/*Extensions*/
 	char sAlgo[64] = {0};
 	char sBackend[64] = {0};
-	char sHashcount[64] = {0};
+	char sHashcount[128] = {0};
 
 	if(ext_backend)
-		snprintf(sBackend, sizeof(sBackend), ",\"backend\":\"%s\"", xmrstak::iBackend::getName(bend->backendType));
+		snprintf(sBackend, sizeof(sBackend), ",\"backend\":\"%s\"", backend_name);
 
 	if(ext_hashcount)
-		snprintf(sHashcount, sizeof(sHashcount), ",\"hashcount\":%llu", int_port(bend->iHashCount.load(std::memory_order_relaxed)));
+		snprintf(sHashcount, sizeof(sHashcount), ",\"hashcount\":%llu,\"hashcount_total\":%llu", int_port(backend_hashcount), int_port(total_hashcount));
 
 	if(ext_algo)
-		snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", algo_full_cn ? "cryptonight" : "cryptonight-lite");
+	{
+		const char* algo_name;
+		switch(algo)
+		{
+		case cryptonight:
+			algo_name = "cryptonight";
+			break;
+		case cryptonight_lite:
+			algo_name = "cryptonight-lite";
+			break;
+		case cryptonight_monero:
+			algo_name = "cryptonight-monero";
+			break;
+		default:
+			algo_name = "unknown";
+			break;
+		}
+
+		snprintf(sAlgo, sizeof(sAlgo), ",\"algo\":\"%s\"", algo_name);
+	}
 
 	bin2hex((unsigned char*)&iNonce, 4, sNonce);
 	sNonce[8] = '\0';
diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp
index d9e554215a99afcd9af3de6fb4f54a2567e5c084..2ddeeee29cf6da67e97386cbe2bb228d484cc5ec 100644
--- a/xmrstak/net/jpsock.hpp
+++ b/xmrstak/net/jpsock.hpp
@@ -2,6 +2,7 @@
 
 #include "xmrstak/backend/iBackend.hpp"
 #include "msgstruct.hpp"
+#include "xmrstak/jconf.hpp"
 
 #include <mutex>
 #include <atomic>
@@ -34,7 +35,7 @@ public:
 	void disconnect(bool quiet = false);
 
 	bool cmd_login();
-	bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, xmrstak::iBackend* bend, bool algo_full_cn);
+	bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult, const char* backend_name, uint64_t backend_hashcount, uint64_t total_hashcount, xmrstak_algo algo);
 
 	static bool hex2bin(const char* in, unsigned int len, unsigned char* out);
 	static void bin2hex(const unsigned char* in, unsigned int len, char* out);
@@ -62,6 +63,7 @@ public:
 	bool get_pool_motd(std::string& strin);
 
 	std::string&& get_call_error();
+	bool have_call_error() { return call_error; }
 	bool have_sock_error() { return bHaveSocketError; }
 
 	inline static uint64_t t32_to_t64(uint32_t t) { return 0xFFFFFFFFFFFFFFFFULL / (0xFFFFFFFFULL / ((uint64_t)t)); }
@@ -106,6 +108,7 @@ private:
 	std::atomic<bool> bRunning;
 	std::atomic<bool> bLoggedIn;
 	std::atomic<bool> quiet_close;
+	std::atomic<bool> call_error;
 
 	uint8_t* bJsonRecvMem;
 	uint8_t* bJsonParseMem;
diff --git a/xmrstak/net/socket.cpp b/xmrstak/net/socket.cpp
index 89e9902613f2690ae2f30e766f2c061083f4471c..7c58a8e0674cedb5d9c9030cdfc337dda9878d8c 100644
--- a/xmrstak/net/socket.cpp
+++ b/xmrstak/net/socket.cpp
@@ -48,6 +48,7 @@ bool plain_socket::set_hostname(const char* sAddr)
 	char sAddrMb[256];
 	char *sTmp, *sPort;
 
+	sock_closed = false;
 	size_t ln = strlen(sAddr);
 	if (ln >= sizeof(sAddrMb))
 		return pCallback->set_socket_error("CONNECT error: Pool address overflow.");
@@ -117,11 +118,16 @@ bool plain_socket::set_hostname(const char* sAddr)
 		return pCallback->set_socket_error_strerr("CONNECT error: Socket creation failed ");
 	}
 
+	int flag = 1;
+	/* If it fails, it fails, we won't loose too much sleep over it */
+	setsockopt(hSocket, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int));
+
 	return true;
 }
 
 bool plain_socket::connect()
 {
+	sock_closed = false;
 	int ret = ::connect(hSocket, pSockAddr->ai_addr, (int)pSockAddr->ai_addrlen);
 
 	freeaddrinfo(pAddrRoot);
@@ -135,6 +141,9 @@ bool plain_socket::connect()
 
 int plain_socket::recv(char* buf, unsigned int len)
 {
+	if(sock_closed)
+		return 0;
+
 	int ret = ::recv(hSocket, buf, len, 0);
 
 	if(ret == 0)
@@ -167,6 +176,7 @@ void plain_socket::close(bool free)
 {
 	if(hSocket != INVALID_SOCKET)
 	{
+		sock_closed = true;
 		sock_close(hSocket);
 		hSocket = INVALID_SOCKET;
 	}
@@ -211,12 +221,13 @@ void tls_socket::init_ctx()
 
 	if(jconf::inst()->TlsSecureAlgos())
 	{
-		SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_COMPRESSION);
+		SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1);
 	}
 }
 
 bool tls_socket::set_hostname(const char* sAddr)
 {
+	sock_closed = false;
 	if(ctx == nullptr)
 	{
 		init_ctx();
@@ -233,6 +244,10 @@ bool tls_socket::set_hostname(const char* sAddr)
 		return false;
 	}
 
+	int flag = 1;
+	/* If it fails, it fails, we won't loose too much sleep over it */
+	setsockopt(BIO_get_fd(bio, nullptr), IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int));
+
 	if(BIO_set_conn_hostname(bio, sAddr) != 1)
 	{
 		print_error();
@@ -248,7 +263,7 @@ bool tls_socket::set_hostname(const char* sAddr)
 
 	if(jconf::inst()->TlsSecureAlgos())
 	{
-		if(SSL_set_cipher_list(ssl, "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4:!SHA1") != 1)
+		if(SSL_set_cipher_list(ssl, "HIGH:!aNULL:!PSK:!SRP:!MD5:!RC4:!SHA1") != 1)
 		{
 			print_error();
 			return false;
@@ -260,6 +275,7 @@ bool tls_socket::set_hostname(const char* sAddr)
 
 bool tls_socket::connect()
 {
+	sock_closed = false;
 	if(BIO_do_connect(bio) != 1)
 	{
 		print_error();
@@ -340,6 +356,9 @@ bool tls_socket::connect()
 
 int tls_socket::recv(char* buf, unsigned int len)
 {
+	if(sock_closed)
+		return 0;
+
 	int ret = BIO_read(bio, buf, len);
 
 	if(ret == 0)
@@ -360,6 +379,7 @@ void tls_socket::close(bool free)
 	if(bio == nullptr || ssl == nullptr)
 		return;
 
+	sock_closed = true;
 	if(!free)
 	{
 		sock_close(BIO_get_fd(bio, nullptr));
diff --git a/xmrstak/net/socket.hpp b/xmrstak/net/socket.hpp
index 192a32c2ea98c81e53f5eae0c0c19312ff2ccd1b..b09142d565a8080d3030eba1951f1309c8611d30 100644
--- a/xmrstak/net/socket.hpp
+++ b/xmrstak/net/socket.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <atomic>
 #include "socks.hpp"
 
 class jpsock;
@@ -12,6 +13,9 @@ public:
 	virtual int recv(char* buf, unsigned int len) = 0;
 	virtual bool send(const char* buf) = 0;
 	virtual void close(bool free) = 0;
+
+protected:
+	std::atomic<bool> sock_closed;
 };
 
 class plain_socket : public base_socket
diff --git a/xmrstak/net/socks.hpp b/xmrstak/net/socks.hpp
index 1d25d3ab7ec08b0aae9ca1fd946218e98abe041e..86749e52773dc557d16c5e283d9c9e5c33dab63e 100644
--- a/xmrstak/net/socks.hpp
+++ b/xmrstak/net/socks.hpp
@@ -62,9 +62,8 @@ inline const char* sock_gai_strerror(int err, char* buf, size_t len)
 #include <unistd.h> /* Needed for close() */
 #include <errno.h>
 #include <string.h>
-#if defined(__FreeBSD__)
 #include <netinet/in.h> /* Needed for IPPROTO_TCP */
-#endif
+#include <netinet/tcp.h>
 
 inline void sock_init() {}
 typedef int SOCKET;
diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp
index bed34276013e8219292e535a9e097ccf856742c2..6928df5eba7231fd7cc1fe67036dda908c1b8567 100644
--- a/xmrstak/params.hpp
+++ b/xmrstak/params.hpp
@@ -40,6 +40,7 @@ struct params
 	std::string currency;
 
 	std::string configFile;
+	std::string configFilePools;
 	std::string configFileAMD;
 	std::string configFileNVIDIA;
 	std::string configFileCPU;
@@ -48,6 +49,9 @@ struct params
 	std::string minerArg0;
 	std::string minerArgs;
 
+	// block_version >= 0 enable benchmark
+	int benchmark_block_version = -1;
+
 	params() :
 		binaryName("xmr-stak"),
 		executablePrefix(""),
@@ -55,6 +59,7 @@ struct params
 		useNVIDIA(true),
 		useCPU(true),
 		configFile("config.txt"),
+		configFilePools("pools.txt"),
 		configFileAMD("amd.txt"),
 		configFileCPU("cpu.txt"),
 		configFileNVIDIA("nvidia.txt")
diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl
new file mode 100644
index 0000000000000000000000000000000000000000..0b7084fb1aa03eda9962cef003885d0f803f2653
--- /dev/null
+++ b/xmrstak/pools.tpl
@@ -0,0 +1,39 @@
+R"===(
+/*
+ * pool_address    - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported.
+ * wallet_address  - Your wallet, or pool login.
+ * rig_id          - Rig identifier for pool-side statistics (needs pool support).
+ * pool_password   - Can be empty in most cases or "x".
+ * use_nicehash    - Limit the nonce to 3 bytes as required by nicehash.
+ * use_tls         - This option will make us connect using Transport Layer Security.
+ * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it.
+ * pool_weight     - Pool weight is a number telling the miner how important the pool is. Miner will mine mostly at the pool 
+ *                   with the highest weight, unless the pool fails. Weight must be an integer larger than 0.
+ *
+ * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com
+ */
+ 
+"pool_list" :
+[
+POOLCONF],
+
+/*
+ * Currency to mine. Supported values:
+ *
+ *    aeon
+ *    cryptonight (try this if your coin is not listed)
+ *    cryptonight_lite
+ *    edollar
+ *    electroneum
+ *    graft
+ *    intense
+ *    karbo
+ *    monero2 (use this for Monero's new PoW)
+ *    sumokoin
+ *
+ */
+
+"currency" : "CURRENCY",
+
+)==="
+		
diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp
index 770834ef803a18775d7b73d4088b50e01eb16946..1444b30bcf7bd3a6085971b58200823706416bee 100644
--- a/xmrstak/version.cpp
+++ b/xmrstak/version.cpp
@@ -32,13 +32,7 @@
 #define OS_TYPE "unk"
 #endif
 
-#if defined(CONF_NO_AEON)
-#define COIN_TYPE "monero"
-#elif defined(CONF_NO_MONERO)
-#define COIN_TYPE "aeon"
-#else
-#define COIN_TYPE "aeon-monero"
-#endif
+#define COIN_TYPE "aeon-cryptonight-monero"
 
 #define XMRSTAK_PP_TOSTRING1(str) #str
 #define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str)