diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.S b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.S
new file mode 100644
index 0000000000000000000000000000000000000000..cd747f7c56cc5aaebfdb64b9cf263005b68fa327
--- /dev/null
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.S
@@ -0,0 +1,21 @@
+#define ALIGN .align
+.intel_syntax noprefix
+.section .text
+.global cryptonigh_v8_mainloop_ivybridge_asm
+.global cryptonigh_v8_mainloop_ryzen_asm
+
+ALIGN 64
+cryptonigh_v8_mainloop_ivybridge_asm:
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cryptonigh_v8_main_loop_ivybridge.inc"
+	add rsp, 48
+	ret 0
+
+ALIGN 64
+cryptonigh_v8_mainloop_ryzen_asm:
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cryptonigh_v8_main_loop_ryzen.inc"
+	add rsp, 48
+	ret 0
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.asm b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.asm
new file mode 100644
index 0000000000000000000000000000000000000000..2101a59ce88b53e4c75b3e632ad25d232c1ec929
--- /dev/null
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop.asm
@@ -0,0 +1,18 @@
+_TEXT_CNV8_MAINLOOP SEGMENT PAGE READ EXECUTE
+PUBLIC cryptonigh_v8_mainloop_ivybridge_asm
+PUBLIC cryptonigh_v8_mainloop_ryzen_asm
+
+ALIGN 64
+cryptonigh_v8_mainloop_ivybridge_asm PROC
+	INCLUDE cryptonigh_v8_main_loop_ivybridge.inc
+	ret 0
+cryptonigh_v8_mainloop_ivybridge_asm ENDP
+
+ALIGN 64
+cryptonigh_v8_mainloop_ryzen_asm PROC
+	INCLUDE cryptonigh_v8_main_loop_ryzen.inc
+	ret 0
+cryptonigh_v8_mainloop_ryzen_asm ENDP
+
+_TEXT_CNV8_MAINLOOP ENDS
+END
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc
new file mode 100644
index 0000000000000000000000000000000000000000..ea7f799fd7b7f0769fbc1d6d6dbc06af1c63628d
--- /dev/null
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ivybridge.inc
@@ -0,0 +1,176 @@
+	mov	 QWORD PTR [rsp+24], rbx
+	push	 rbp
+	push	 rsi
+	push	 rdi
+	push	 r12
+	push	 r13
+	push	 r14
+	push	 r15
+	sub	 rsp, 80
+
+	stmxcsr DWORD PTR [rsp]
+	mov DWORD PTR [rsp+4], 24448
+	ldmxcsr DWORD PTR [rsp+4]
+
+	mov	 rax, QWORD PTR [rcx+48]
+	mov	 r9, rcx
+	xor	 rax, QWORD PTR [rcx+16]
+	mov	 esi, 524288
+	mov	 r8, QWORD PTR [rcx+32]
+	mov	 r13d, -2147483647
+	xor	 r8, QWORD PTR [rcx]
+	mov	 r11, QWORD PTR [rcx+40]
+	mov	 r10, r8
+	mov	 rdx, QWORD PTR [rcx+56]
+	movq	 xmm4, rax
+	xor	 rdx, QWORD PTR [rcx+24]
+	xor	 r11, QWORD PTR [rcx+8]
+	mov	 rbx, QWORD PTR [rcx+224]
+	mov	 rax, QWORD PTR [r9+80]
+	xor	 rax, QWORD PTR [r9+64]
+	movq	 xmm0, rdx
+	mov	 rcx, QWORD PTR [rcx+88]
+	xor	 rcx, QWORD PTR [r9+72]
+	movq	 xmm3, QWORD PTR [r9+104]
+	movaps	 XMMWORD PTR [rsp+64], xmm6
+	movaps	 XMMWORD PTR [rsp+48], xmm7
+	movaps	 XMMWORD PTR [rsp+32], xmm8
+	and	 r10d, 2097136
+	movq	 xmm5, rax
+
+	xor eax, eax
+	mov QWORD PTR [rsp+16], rax
+
+	mov ax, 1023
+	shl rax, 52
+	movq xmm8, rax
+	mov r15, QWORD PTR [r9+96]
+	punpcklqdq xmm4, xmm0
+	movq	 xmm0, rcx
+	punpcklqdq xmm5, xmm0
+
+	ALIGN 64
+$main_loop_ivybridge:
+	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
+	lea	 rdx, QWORD PTR [r10+rbx]
+	mov	 ecx, r10d
+	mov	 eax, r10d
+	mov rdi, r15
+	xor	 ecx, 16
+	xor	 eax, 32
+	xor	 r10d, 48
+	movq	 xmm0, r11
+	movq	 xmm7, r8
+	punpcklqdq xmm7, xmm0
+	aesenc	 xmm6, xmm7
+	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
+	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
+	paddq	 xmm1, xmm7
+	movdqu	 xmm2, XMMWORD PTR [rcx+rbx]
+	paddq	 xmm0, xmm5
+	paddq	 xmm2, xmm4
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
+	movq	 rcx, xmm3
+	movdqu	 XMMWORD PTR [rax+rbx], xmm2
+	mov	 rax, rcx
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	shl	 rax, 32
+	xor	 rdi, rax
+	movq	 rbp, xmm6
+	movdqa	 xmm0, xmm6
+	pxor	 xmm0, xmm4
+	mov	 r10, rbp
+	and	 r10d, 2097136
+	movdqu	 XMMWORD PTR [rdx], xmm0
+	xor	 rdi, QWORD PTR [r10+rbx]
+	lea	 r14, QWORD PTR [r10+rbx]
+	mov	 r12, QWORD PTR [r10+rbx+8]
+	xor	 edx, edx
+	lea	 r9d, DWORD PTR [ecx+ecx]
+	add	 r9d, ebp
+	movdqa	 xmm0, xmm6
+	psrldq	 xmm0, 8
+	or	 r9d, r13d
+	movq	 rax, xmm0
+	div	 r9
+	mov	 eax, eax
+	shl	 rdx, 32
+	add	 rdx, rax
+	lea	 r9, QWORD PTR [rdx+rbp]
+	mov r15, rdx
+	mov	 rax, r9
+	shr	 rax, 12
+	movq	 xmm0, rax
+	paddq	 xmm0, xmm8
+	sqrtsd	 xmm3, xmm0
+	movq	 rdx, xmm3
+	test	 rdx, 524287
+	je	 $sqrt_fixup_ivybridge
+	psrlq	 xmm3, 19
+	psubq	 xmm3, XMMWORD PTR [rsp+16]
+$sqrt_fixup_ivybridge_ret:
+
+	mov	 r9, r10
+	mov	 rax, rdi
+	mul	 rbp
+
+	xor	 r9, 16
+	mov	 rcx, r10
+	xor	 rcx, 32
+	xor	 r10, 48
+	add	 r8, rdx
+	add	 r11, rax
+	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
+	movdqu	 xmm2, XMMWORD PTR [r9+rbx]
+	paddq	 xmm0, xmm5
+	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
+	paddq	 xmm2, xmm4
+	paddq	 xmm1, xmm7
+	movdqa	 xmm5, xmm4
+	movdqu	 XMMWORD PTR [r9+rbx], xmm0
+	movdqa	 xmm4, xmm6
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm2
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	mov	 QWORD PTR [r14], r8
+	xor	 r8, rdi
+	mov	 r10, r8
+	mov	 QWORD PTR [r14+8], r11
+	and	 r10d, 2097136
+	xor	 r11, r12
+	dec rsi
+	jne	 $main_loop_ivybridge
+
+	ldmxcsr DWORD PTR [rsp]
+	mov	 rbx, QWORD PTR [rsp+160]
+	movaps	 xmm6, XMMWORD PTR [rsp+64]
+	movaps	 xmm7, XMMWORD PTR [rsp+48]
+	movaps	 xmm8, XMMWORD PTR [rsp+32]
+	add	 rsp, 80
+	pop	 r15
+	pop	 r14
+	pop	 r13
+	pop	 r12
+	pop	 rdi
+	pop	 rsi
+	pop	 rbp
+	jmp $cnv2_main_loop_ivybridge_endp
+
+$sqrt_fixup_ivybridge:
+	dec	 rdx
+	mov	 r13, -4389456576512
+	mov	 rax, rdx
+	shr	 rdx, 19
+	shr	 rax, 20
+	mov	 rcx, rdx
+	sub	 rcx, rax
+	add	 rax, r13
+	mov r13, 4389456576511
+	sub	 rcx, r13
+	mov	 r13d, -2147483647
+	imul	 rcx, rax
+	sub	 rcx, r9
+	adc	 rdx, 0
+	movq	 xmm3, rdx
+	jmp	 $sqrt_fixup_ivybridge_ret
+
+$cnv2_main_loop_ivybridge_endp:
diff --git a/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc
new file mode 100644
index 0000000000000000000000000000000000000000..5797f5497ad21972d49b0b9f714b4595bcbb570e
--- /dev/null
+++ b/xmrstak/backend/cpu/crypto/asm/cryptonigh_v8_main_loop_ryzen.inc
@@ -0,0 +1,174 @@
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 64
+
+	stmxcsr DWORD PTR [rsp]
+	mov DWORD PTR [rsp+4], 24448
+	ldmxcsr DWORD PTR [rsp+4]
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r9, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	ebp, 524288
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r11, QWORD PTR [rcx+40]
+	mov	r10, r8
+	mov	rdx, QWORD PTR [rcx+56]
+	movq	xmm3, rax
+	xor	rdx, QWORD PTR [rcx+24]
+	xor	r11, QWORD PTR [rcx+8]
+	mov	rbx, QWORD PTR [rcx+224]
+	mov	rax, QWORD PTR [r9+80]
+	xor	rax, QWORD PTR [r9+64]
+	movq	xmm0, rdx
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r9+72]
+	mov	rdi, QWORD PTR [r9+104]
+	and	r10d, 2097136
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movq	xmm4, rax
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	xorps	xmm8, xmm8
+	mov ax, 1023
+	shl rax, 52
+	movq xmm7, rax
+	mov	r15, QWORD PTR [r9+96]
+	punpcklqdq xmm3, xmm0
+	movq	xmm0, rcx
+	punpcklqdq xmm4, xmm0
+
+	ALIGN 64
+$main_loop_ryzen:
+	movdqa	xmm5, XMMWORD PTR [r10+rbx]
+	movq	xmm0, r11
+	movq	xmm6, r8
+	punpcklqdq xmm6, xmm0
+	lea	rdx, QWORD PTR [r10+rbx]
+	lea	r9, QWORD PTR [rdi+rdi]
+	shl	rdi, 32
+
+	mov	ecx, r10d
+	mov	eax, r10d
+	xor	ecx, 16
+	xor	eax, 32
+	xor	r10d, 48
+	aesenc	xmm5, xmm6
+	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
+	movdqa	xmm1, XMMWORD PTR [rax+rbx]
+	movdqa	xmm0, XMMWORD PTR [r10+rbx]
+	paddq	xmm2, xmm3
+	paddq	xmm1, xmm6
+	paddq	xmm0, xmm4
+	movdqa	XMMWORD PTR [rcx+rbx], xmm0
+	movdqa	XMMWORD PTR [rax+rbx], xmm2
+	movdqa	XMMWORD PTR [r10+rbx], xmm1
+
+	movaps	xmm1, xmm8
+	mov	rsi, r15
+	xor	rsi, rdi
+	movq	r14, xmm5
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm3
+	mov	r10, r14
+	and	r10d, 2097136
+	movdqa	XMMWORD PTR [rdx], xmm0
+	xor	rsi, QWORD PTR [r10+rbx]
+	lea	r12, QWORD PTR [r10+rbx]
+	mov	r13, QWORD PTR [r10+rbx+8]
+
+	add	r9d, r14d
+	or	r9d, -2147483647
+	xor	edx, edx
+	movdqa	xmm0, xmm5
+	psrldq	xmm0, 8
+	movq	rax, xmm0
+
+	div	r9
+	movq xmm0, rax
+	movq xmm1, rdx
+	punpckldq xmm0, xmm1
+	movq r15, xmm0
+	paddq xmm0, xmm5
+	movdqa xmm2, xmm0
+	psrlq xmm0, 12
+	paddq	xmm0, xmm7
+	sqrtsd	xmm1, xmm0
+	movq	rdi, xmm1
+	test	rdi, 524287
+	je	$sqrt_fixup_ryzen
+	shr	rdi, 19
+
+$sqrt_fixup_ryzen_ret:
+	mov	rax, rsi
+	mul	r14
+
+	mov	r9d, r10d
+	mov	ecx, r10d
+	xor	r9d, 16
+	xor	ecx, 32
+	xor	r10d, 48
+	movdqa	xmm0, XMMWORD PTR [r10+rbx]
+	movdqa	xmm2, XMMWORD PTR [r9+rbx]
+	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
+	paddq	xmm0, xmm4
+	paddq	xmm2, xmm3
+	paddq	xmm1, xmm6
+	movdqa	XMMWORD PTR [r9+rbx], xmm0
+	movdqa	XMMWORD PTR [rcx+rbx], xmm2
+	movdqa	XMMWORD PTR [r10+rbx], xmm1
+
+	movdqa	xmm4, xmm3
+	add	r8, rdx
+	add	r11, rax
+	mov	QWORD PTR [r12], r8
+	xor	r8, rsi
+	mov	QWORD PTR [r12+8], r11
+	mov	r10, r8
+	xor	r11, r13
+	and	r10d, 2097136
+	movdqa	xmm3, xmm5
+	dec	ebp
+	jne	$main_loop_ryzen
+
+	ldmxcsr DWORD PTR [rsp]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	lea	r11, QWORD PTR [rsp+64]
+	mov	rbx, QWORD PTR [r11+56]
+	mov	rbp, QWORD PTR [r11+64]
+	mov	rsi, QWORD PTR [r11+72]
+	movaps	xmm8, XMMWORD PTR [r11-48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	mov	rsp, r11
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	jmp $cnv2_main_loop_ryzen_endp
+
+$sqrt_fixup_ryzen:
+	movq r9, xmm2
+	dec	rdi
+	mov	rdx, 4389456576511
+	mov	rax, rdi
+	shr	rdi, 19
+	shr	rax, 20
+	mov	rcx, rdi
+	sub	rcx, rax
+	sub	rcx, rdx
+	mov	rdx, -4389456576512
+	add	rax, rdx
+	imul	rcx, rax
+	sub	rcx, r9
+	adc	rdi, 0
+	jmp	$sqrt_fixup_ryzen_ret
+
+$cnv2_main_loop_ryzen_endp: