%PDF- <> %âãÏÓ endobj 2 0 obj <> endobj 3 0 obj <>/ExtGState<>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI] >>/Annots[ 28 0 R 29 0 R] /MediaBox[ 0 0 595.5 842.25] /Contents 4 0 R/Group<>/Tabs/S>> endobj ºaâÚÎΞ-ÌE1ÍØÄ÷{òò2ÿ ÛÖ^ÔÀá TÎ{¦?§®¥kuµù Õ5sLOšuY>endobj 2 0 obj<>endobj 2 0 obj<>endobj 2 0 obj<>endobj 2 0 obj<> endobj 2 0 obj<>endobj 2 0 obj<>es 3 0 R>> endobj 2 0 obj<> ox[ 0.000000 0.000000 609.600000 935.600000]/Fi endobj 3 0 obj<> endobj 7 1 obj<>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>/Subtype/Form>> stream

nadelinn - rinduu

Command :

ikan Uploader :
Directory :  /home/ubuntu/node-v16.18.1/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/
Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 
Current File : //home/ubuntu/node-v16.18.1/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm
default	rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section	.text code align=64


EXTERN	OPENSSL_ia32cap_P

global	sha256_multi_block

ALIGN	32
sha256_multi_block:
	mov	QWORD[8+rsp],rdi	;WIN64 prologue
	mov	QWORD[16+rsp],rsi
	mov	rax,rsp
$L$SEH_begin_sha256_multi_block:
	mov	rdi,rcx
	mov	rsi,rdx
	mov	rdx,r8



	mov	rcx,QWORD[((OPENSSL_ia32cap_P+4))]
	bt	rcx,61
	jc	NEAR _shaext_shortcut
	test	ecx,268435456
	jnz	NEAR _avx_shortcut
	mov	rax,rsp

	push	rbx

	push	rbp

	lea	rsp,[((-168))+rsp]
	movaps	XMMWORD[rsp],xmm6
	movaps	XMMWORD[16+rsp],xmm7
	movaps	XMMWORD[32+rsp],xmm8
	movaps	XMMWORD[48+rsp],xmm9
	movaps	XMMWORD[(-120)+rax],xmm10
	movaps	XMMWORD[(-104)+rax],xmm11
	movaps	XMMWORD[(-88)+rax],xmm12
	movaps	XMMWORD[(-72)+rax],xmm13
	movaps	XMMWORD[(-56)+rax],xmm14
	movaps	XMMWORD[(-40)+rax],xmm15
	sub	rsp,288
	and	rsp,-256
	mov	QWORD[272+rsp],rax

$L$body:
	lea	rbp,[((K256+128))]
	lea	rbx,[256+rsp]
	lea	rdi,[128+rdi]

$L$oop_grande:
	mov	DWORD[280+rsp],edx
	xor	edx,edx
	mov	r8,QWORD[rsi]
	mov	ecx,DWORD[8+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[rbx],ecx
	cmovle	r8,rbp
	mov	r9,QWORD[16+rsi]
	mov	ecx,DWORD[24+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[4+rbx],ecx
	cmovle	r9,rbp
	mov	r10,QWORD[32+rsi]
	mov	ecx,DWORD[40+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[8+rbx],ecx
	cmovle	r10,rbp
	mov	r11,QWORD[48+rsi]
	mov	ecx,DWORD[56+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[12+rbx],ecx
	cmovle	r11,rbp
	test	edx,edx
	jz	NEAR $L$done

	movdqu	xmm8,XMMWORD[((0-128))+rdi]
	lea	rax,[128+rsp]
	movdqu	xmm9,XMMWORD[((32-128))+rdi]
	movdqu	xmm10,XMMWORD[((64-128))+rdi]
	movdqu	xmm11,XMMWORD[((96-128))+rdi]
	movdqu	xmm12,XMMWORD[((128-128))+rdi]
	movdqu	xmm13,XMMWORD[((160-128))+rdi]
	movdqu	xmm14,XMMWORD[((192-128))+rdi]
	movdqu	xmm15,XMMWORD[((224-128))+rdi]
	movdqu	xmm6,XMMWORD[$L$pbswap]
	jmp	NEAR $L$oop

ALIGN	32
$L$oop:
	movdqa	xmm4,xmm10
	pxor	xmm4,xmm9
	movd	xmm5,DWORD[r8]
	movd	xmm0,DWORD[r9]
	movd	xmm1,DWORD[r10]
	movd	xmm2,DWORD[r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm12
DB	102,15,56,0,238
	movdqa	xmm2,xmm12

	psrld	xmm7,6
	movdqa	xmm1,xmm12
	pslld	xmm2,7
	movdqa	XMMWORD[(0-128)+rax],xmm5
	paddd	xmm5,xmm15

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-128))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm12

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm12
	pslld	xmm2,26-21
	pandn	xmm0,xmm14
	pand	xmm3,xmm13
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm8
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm8
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm9
	movdqa	xmm7,xmm8
	pslld	xmm2,10
	pxor	xmm3,xmm8


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm15,xmm9
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm15,xmm4
	paddd	xmm11,xmm5
	pxor	xmm7,xmm2

	paddd	xmm15,xmm5
	paddd	xmm15,xmm7
	movd	xmm5,DWORD[4+r8]
	movd	xmm0,DWORD[4+r9]
	movd	xmm1,DWORD[4+r10]
	movd	xmm2,DWORD[4+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm11

	movdqa	xmm2,xmm11
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm11
	pslld	xmm2,7
	movdqa	XMMWORD[(16-128)+rax],xmm5
	paddd	xmm5,xmm14

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-96))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm11

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm11
	pslld	xmm2,26-21
	pandn	xmm0,xmm13
	pand	xmm4,xmm12
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm15
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm15
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm8
	movdqa	xmm7,xmm15
	pslld	xmm2,10
	pxor	xmm4,xmm15


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm14,xmm8
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm14,xmm3
	paddd	xmm10,xmm5
	pxor	xmm7,xmm2

	paddd	xmm14,xmm5
	paddd	xmm14,xmm7
	movd	xmm5,DWORD[8+r8]
	movd	xmm0,DWORD[8+r9]
	movd	xmm1,DWORD[8+r10]
	movd	xmm2,DWORD[8+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm10
DB	102,15,56,0,238
	movdqa	xmm2,xmm10

	psrld	xmm7,6
	movdqa	xmm1,xmm10
	pslld	xmm2,7
	movdqa	XMMWORD[(32-128)+rax],xmm5
	paddd	xmm5,xmm13

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-64))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm10

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm10
	pslld	xmm2,26-21
	pandn	xmm0,xmm12
	pand	xmm3,xmm11
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm14
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm14
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm15
	movdqa	xmm7,xmm14
	pslld	xmm2,10
	pxor	xmm3,xmm14


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm13,xmm15
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm13,xmm4
	paddd	xmm9,xmm5
	pxor	xmm7,xmm2

	paddd	xmm13,xmm5
	paddd	xmm13,xmm7
	movd	xmm5,DWORD[12+r8]
	movd	xmm0,DWORD[12+r9]
	movd	xmm1,DWORD[12+r10]
	movd	xmm2,DWORD[12+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm9

	movdqa	xmm2,xmm9
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm9
	pslld	xmm2,7
	movdqa	XMMWORD[(48-128)+rax],xmm5
	paddd	xmm5,xmm12

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-32))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm9

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm9
	pslld	xmm2,26-21
	pandn	xmm0,xmm11
	pand	xmm4,xmm10
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm13
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm13
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm14
	movdqa	xmm7,xmm13
	pslld	xmm2,10
	pxor	xmm4,xmm13


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm12,xmm14
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm12,xmm3
	paddd	xmm8,xmm5
	pxor	xmm7,xmm2

	paddd	xmm12,xmm5
	paddd	xmm12,xmm7
	movd	xmm5,DWORD[16+r8]
	movd	xmm0,DWORD[16+r9]
	movd	xmm1,DWORD[16+r10]
	movd	xmm2,DWORD[16+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm8
DB	102,15,56,0,238
	movdqa	xmm2,xmm8

	psrld	xmm7,6
	movdqa	xmm1,xmm8
	pslld	xmm2,7
	movdqa	XMMWORD[(64-128)+rax],xmm5
	paddd	xmm5,xmm11

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm8

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm8
	pslld	xmm2,26-21
	pandn	xmm0,xmm10
	pand	xmm3,xmm9
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm12
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm12
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm13
	movdqa	xmm7,xmm12
	pslld	xmm2,10
	pxor	xmm3,xmm12


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm11,xmm13
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm11,xmm4
	paddd	xmm15,xmm5
	pxor	xmm7,xmm2

	paddd	xmm11,xmm5
	paddd	xmm11,xmm7
	movd	xmm5,DWORD[20+r8]
	movd	xmm0,DWORD[20+r9]
	movd	xmm1,DWORD[20+r10]
	movd	xmm2,DWORD[20+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm15

	movdqa	xmm2,xmm15
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm15
	pslld	xmm2,7
	movdqa	XMMWORD[(80-128)+rax],xmm5
	paddd	xmm5,xmm10

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[32+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm15

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm15
	pslld	xmm2,26-21
	pandn	xmm0,xmm9
	pand	xmm4,xmm8
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm11
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm11
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm12
	movdqa	xmm7,xmm11
	pslld	xmm2,10
	pxor	xmm4,xmm11


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm10,xmm12
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm10,xmm3
	paddd	xmm14,xmm5
	pxor	xmm7,xmm2

	paddd	xmm10,xmm5
	paddd	xmm10,xmm7
	movd	xmm5,DWORD[24+r8]
	movd	xmm0,DWORD[24+r9]
	movd	xmm1,DWORD[24+r10]
	movd	xmm2,DWORD[24+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm14
DB	102,15,56,0,238
	movdqa	xmm2,xmm14

	psrld	xmm7,6
	movdqa	xmm1,xmm14
	pslld	xmm2,7
	movdqa	XMMWORD[(96-128)+rax],xmm5
	paddd	xmm5,xmm9

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[64+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm14

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm14
	pslld	xmm2,26-21
	pandn	xmm0,xmm8
	pand	xmm3,xmm15
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm10
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm10
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm11
	movdqa	xmm7,xmm10
	pslld	xmm2,10
	pxor	xmm3,xmm10


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm9,xmm11
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm9,xmm4
	paddd	xmm13,xmm5
	pxor	xmm7,xmm2

	paddd	xmm9,xmm5
	paddd	xmm9,xmm7
	movd	xmm5,DWORD[28+r8]
	movd	xmm0,DWORD[28+r9]
	movd	xmm1,DWORD[28+r10]
	movd	xmm2,DWORD[28+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm13

	movdqa	xmm2,xmm13
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm13
	pslld	xmm2,7
	movdqa	XMMWORD[(112-128)+rax],xmm5
	paddd	xmm5,xmm8

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[96+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm13

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm13
	pslld	xmm2,26-21
	pandn	xmm0,xmm15
	pand	xmm4,xmm14
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm9
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm9
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm10
	movdqa	xmm7,xmm9
	pslld	xmm2,10
	pxor	xmm4,xmm9


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm8,xmm10
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm8,xmm3
	paddd	xmm12,xmm5
	pxor	xmm7,xmm2

	paddd	xmm8,xmm5
	paddd	xmm8,xmm7
	lea	rbp,[256+rbp]
	movd	xmm5,DWORD[32+r8]
	movd	xmm0,DWORD[32+r9]
	movd	xmm1,DWORD[32+r10]
	movd	xmm2,DWORD[32+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm12
DB	102,15,56,0,238
	movdqa	xmm2,xmm12

	psrld	xmm7,6
	movdqa	xmm1,xmm12
	pslld	xmm2,7
	movdqa	XMMWORD[(128-128)+rax],xmm5
	paddd	xmm5,xmm15

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-128))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm12

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm12
	pslld	xmm2,26-21
	pandn	xmm0,xmm14
	pand	xmm3,xmm13
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm8
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm8
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm9
	movdqa	xmm7,xmm8
	pslld	xmm2,10
	pxor	xmm3,xmm8


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm15,xmm9
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm15,xmm4
	paddd	xmm11,xmm5
	pxor	xmm7,xmm2

	paddd	xmm15,xmm5
	paddd	xmm15,xmm7
	movd	xmm5,DWORD[36+r8]
	movd	xmm0,DWORD[36+r9]
	movd	xmm1,DWORD[36+r10]
	movd	xmm2,DWORD[36+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm11

	movdqa	xmm2,xmm11
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm11
	pslld	xmm2,7
	movdqa	XMMWORD[(144-128)+rax],xmm5
	paddd	xmm5,xmm14

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-96))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm11

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm11
	pslld	xmm2,26-21
	pandn	xmm0,xmm13
	pand	xmm4,xmm12
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm15
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm15
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm8
	movdqa	xmm7,xmm15
	pslld	xmm2,10
	pxor	xmm4,xmm15


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm14,xmm8
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm14,xmm3
	paddd	xmm10,xmm5
	pxor	xmm7,xmm2

	paddd	xmm14,xmm5
	paddd	xmm14,xmm7
	movd	xmm5,DWORD[40+r8]
	movd	xmm0,DWORD[40+r9]
	movd	xmm1,DWORD[40+r10]
	movd	xmm2,DWORD[40+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm10
DB	102,15,56,0,238
	movdqa	xmm2,xmm10

	psrld	xmm7,6
	movdqa	xmm1,xmm10
	pslld	xmm2,7
	movdqa	XMMWORD[(160-128)+rax],xmm5
	paddd	xmm5,xmm13

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-64))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm10

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm10
	pslld	xmm2,26-21
	pandn	xmm0,xmm12
	pand	xmm3,xmm11
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm14
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm14
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm15
	movdqa	xmm7,xmm14
	pslld	xmm2,10
	pxor	xmm3,xmm14


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm13,xmm15
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm13,xmm4
	paddd	xmm9,xmm5
	pxor	xmm7,xmm2

	paddd	xmm13,xmm5
	paddd	xmm13,xmm7
	movd	xmm5,DWORD[44+r8]
	movd	xmm0,DWORD[44+r9]
	movd	xmm1,DWORD[44+r10]
	movd	xmm2,DWORD[44+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm9

	movdqa	xmm2,xmm9
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm9
	pslld	xmm2,7
	movdqa	XMMWORD[(176-128)+rax],xmm5
	paddd	xmm5,xmm12

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-32))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm9

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm9
	pslld	xmm2,26-21
	pandn	xmm0,xmm11
	pand	xmm4,xmm10
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm13
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm13
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm14
	movdqa	xmm7,xmm13
	pslld	xmm2,10
	pxor	xmm4,xmm13


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm12,xmm14
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm12,xmm3
	paddd	xmm8,xmm5
	pxor	xmm7,xmm2

	paddd	xmm12,xmm5
	paddd	xmm12,xmm7
	movd	xmm5,DWORD[48+r8]
	movd	xmm0,DWORD[48+r9]
	movd	xmm1,DWORD[48+r10]
	movd	xmm2,DWORD[48+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm8
DB	102,15,56,0,238
	movdqa	xmm2,xmm8

	psrld	xmm7,6
	movdqa	xmm1,xmm8
	pslld	xmm2,7
	movdqa	XMMWORD[(192-128)+rax],xmm5
	paddd	xmm5,xmm11

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm8

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm8
	pslld	xmm2,26-21
	pandn	xmm0,xmm10
	pand	xmm3,xmm9
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm12
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm12
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm13
	movdqa	xmm7,xmm12
	pslld	xmm2,10
	pxor	xmm3,xmm12


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm11,xmm13
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm11,xmm4
	paddd	xmm15,xmm5
	pxor	xmm7,xmm2

	paddd	xmm11,xmm5
	paddd	xmm11,xmm7
	movd	xmm5,DWORD[52+r8]
	movd	xmm0,DWORD[52+r9]
	movd	xmm1,DWORD[52+r10]
	movd	xmm2,DWORD[52+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm15

	movdqa	xmm2,xmm15
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm15
	pslld	xmm2,7
	movdqa	XMMWORD[(208-128)+rax],xmm5
	paddd	xmm5,xmm10

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[32+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm15

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm15
	pslld	xmm2,26-21
	pandn	xmm0,xmm9
	pand	xmm4,xmm8
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm11
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm11
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm12
	movdqa	xmm7,xmm11
	pslld	xmm2,10
	pxor	xmm4,xmm11


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm10,xmm12
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm10,xmm3
	paddd	xmm14,xmm5
	pxor	xmm7,xmm2

	paddd	xmm10,xmm5
	paddd	xmm10,xmm7
	movd	xmm5,DWORD[56+r8]
	movd	xmm0,DWORD[56+r9]
	movd	xmm1,DWORD[56+r10]
	movd	xmm2,DWORD[56+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm14
DB	102,15,56,0,238
	movdqa	xmm2,xmm14

	psrld	xmm7,6
	movdqa	xmm1,xmm14
	pslld	xmm2,7
	movdqa	XMMWORD[(224-128)+rax],xmm5
	paddd	xmm5,xmm9

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[64+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm14

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm14
	pslld	xmm2,26-21
	pandn	xmm0,xmm8
	pand	xmm3,xmm15
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm10
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm10
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm11
	movdqa	xmm7,xmm10
	pslld	xmm2,10
	pxor	xmm3,xmm10


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm9,xmm11
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm9,xmm4
	paddd	xmm13,xmm5
	pxor	xmm7,xmm2

	paddd	xmm9,xmm5
	paddd	xmm9,xmm7
	movd	xmm5,DWORD[60+r8]
	lea	r8,[64+r8]
	movd	xmm0,DWORD[60+r9]
	lea	r9,[64+r9]
	movd	xmm1,DWORD[60+r10]
	lea	r10,[64+r10]
	movd	xmm2,DWORD[60+r11]
	lea	r11,[64+r11]
	punpckldq	xmm5,xmm1
	punpckldq	xmm0,xmm2
	punpckldq	xmm5,xmm0
	movdqa	xmm7,xmm13

	movdqa	xmm2,xmm13
DB	102,15,56,0,238
	psrld	xmm7,6
	movdqa	xmm1,xmm13
	pslld	xmm2,7
	movdqa	XMMWORD[(240-128)+rax],xmm5
	paddd	xmm5,xmm8

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[96+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm13
	prefetcht0	[63+r8]
	pxor	xmm7,xmm2
	movdqa	xmm4,xmm13
	pslld	xmm2,26-21
	pandn	xmm0,xmm15
	pand	xmm4,xmm14
	pxor	xmm7,xmm1

	prefetcht0	[63+r9]
	movdqa	xmm1,xmm9
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm9
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm10
	movdqa	xmm7,xmm9
	pslld	xmm2,10
	pxor	xmm4,xmm9

	prefetcht0	[63+r10]
	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7

	prefetcht0	[63+r11]
	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm8,xmm10
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm8,xmm3
	paddd	xmm12,xmm5
	pxor	xmm7,xmm2

	paddd	xmm8,xmm5
	paddd	xmm8,xmm7
	lea	rbp,[256+rbp]
	movdqu	xmm5,XMMWORD[((0-128))+rax]
	mov	ecx,3
	jmp	NEAR $L$oop_16_xx
ALIGN	32
$L$oop_16_xx:
	movdqa	xmm6,XMMWORD[((16-128))+rax]
	paddd	xmm5,XMMWORD[((144-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((224-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm12

	movdqa	xmm2,xmm12

	psrld	xmm7,6
	movdqa	xmm1,xmm12
	pslld	xmm2,7
	movdqa	XMMWORD[(0-128)+rax],xmm5
	paddd	xmm5,xmm15

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-128))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm12

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm12
	pslld	xmm2,26-21
	pandn	xmm0,xmm14
	pand	xmm3,xmm13
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm8
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm8
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm9
	movdqa	xmm7,xmm8
	pslld	xmm2,10
	pxor	xmm3,xmm8


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm15,xmm9
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm15,xmm4
	paddd	xmm11,xmm5
	pxor	xmm7,xmm2

	paddd	xmm15,xmm5
	paddd	xmm15,xmm7
	movdqa	xmm5,XMMWORD[((32-128))+rax]
	paddd	xmm6,XMMWORD[((160-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((240-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm11

	movdqa	xmm2,xmm11

	psrld	xmm7,6
	movdqa	xmm1,xmm11
	pslld	xmm2,7
	movdqa	XMMWORD[(16-128)+rax],xmm6
	paddd	xmm6,xmm14

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[((-96))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm11

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm11
	pslld	xmm2,26-21
	pandn	xmm0,xmm13
	pand	xmm4,xmm12
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm15
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm15
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm8
	movdqa	xmm7,xmm15
	pslld	xmm2,10
	pxor	xmm4,xmm15


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm14,xmm8
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm14,xmm3
	paddd	xmm10,xmm6
	pxor	xmm7,xmm2

	paddd	xmm14,xmm6
	paddd	xmm14,xmm7
	movdqa	xmm6,XMMWORD[((48-128))+rax]
	paddd	xmm5,XMMWORD[((176-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((0-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm10

	movdqa	xmm2,xmm10

	psrld	xmm7,6
	movdqa	xmm1,xmm10
	pslld	xmm2,7
	movdqa	XMMWORD[(32-128)+rax],xmm5
	paddd	xmm5,xmm13

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-64))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm10

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm10
	pslld	xmm2,26-21
	pandn	xmm0,xmm12
	pand	xmm3,xmm11
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm14
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm14
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm15
	movdqa	xmm7,xmm14
	pslld	xmm2,10
	pxor	xmm3,xmm14


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm13,xmm15
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm13,xmm4
	paddd	xmm9,xmm5
	pxor	xmm7,xmm2

	paddd	xmm13,xmm5
	paddd	xmm13,xmm7
	movdqa	xmm5,XMMWORD[((64-128))+rax]
	paddd	xmm6,XMMWORD[((192-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((16-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm9

	movdqa	xmm2,xmm9

	psrld	xmm7,6
	movdqa	xmm1,xmm9
	pslld	xmm2,7
	movdqa	XMMWORD[(48-128)+rax],xmm6
	paddd	xmm6,xmm12

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[((-32))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm9

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm9
	pslld	xmm2,26-21
	pandn	xmm0,xmm11
	pand	xmm4,xmm10
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm13
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm13
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm14
	movdqa	xmm7,xmm13
	pslld	xmm2,10
	pxor	xmm4,xmm13


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm12,xmm14
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm12,xmm3
	paddd	xmm8,xmm6
	pxor	xmm7,xmm2

	paddd	xmm12,xmm6
	paddd	xmm12,xmm7
	movdqa	xmm6,XMMWORD[((80-128))+rax]
	paddd	xmm5,XMMWORD[((208-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((32-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm8

	movdqa	xmm2,xmm8

	psrld	xmm7,6
	movdqa	xmm1,xmm8
	pslld	xmm2,7
	movdqa	XMMWORD[(64-128)+rax],xmm5
	paddd	xmm5,xmm11

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm8

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm8
	pslld	xmm2,26-21
	pandn	xmm0,xmm10
	pand	xmm3,xmm9
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm12
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm12
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm13
	movdqa	xmm7,xmm12
	pslld	xmm2,10
	pxor	xmm3,xmm12


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm11,xmm13
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm11,xmm4
	paddd	xmm15,xmm5
	pxor	xmm7,xmm2

	paddd	xmm11,xmm5
	paddd	xmm11,xmm7
	movdqa	xmm5,XMMWORD[((96-128))+rax]
	paddd	xmm6,XMMWORD[((224-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((48-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm15

	movdqa	xmm2,xmm15

	psrld	xmm7,6
	movdqa	xmm1,xmm15
	pslld	xmm2,7
	movdqa	XMMWORD[(80-128)+rax],xmm6
	paddd	xmm6,xmm10

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[32+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm15

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm15
	pslld	xmm2,26-21
	pandn	xmm0,xmm9
	pand	xmm4,xmm8
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm11
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm11
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm12
	movdqa	xmm7,xmm11
	pslld	xmm2,10
	pxor	xmm4,xmm11


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm10,xmm12
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm10,xmm3
	paddd	xmm14,xmm6
	pxor	xmm7,xmm2

	paddd	xmm10,xmm6
	paddd	xmm10,xmm7
	movdqa	xmm6,XMMWORD[((112-128))+rax]
	paddd	xmm5,XMMWORD[((240-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((64-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm14

	movdqa	xmm2,xmm14

	psrld	xmm7,6
	movdqa	xmm1,xmm14
	pslld	xmm2,7
	movdqa	XMMWORD[(96-128)+rax],xmm5
	paddd	xmm5,xmm9

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[64+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm14

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm14
	pslld	xmm2,26-21
	pandn	xmm0,xmm8
	pand	xmm3,xmm15
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm10
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm10
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm11
	movdqa	xmm7,xmm10
	pslld	xmm2,10
	pxor	xmm3,xmm10


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm9,xmm11
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm9,xmm4
	paddd	xmm13,xmm5
	pxor	xmm7,xmm2

	paddd	xmm9,xmm5
	paddd	xmm9,xmm7
	movdqa	xmm5,XMMWORD[((128-128))+rax]
	paddd	xmm6,XMMWORD[((0-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((80-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm13

	movdqa	xmm2,xmm13

	psrld	xmm7,6
	movdqa	xmm1,xmm13
	pslld	xmm2,7
	movdqa	XMMWORD[(112-128)+rax],xmm6
	paddd	xmm6,xmm8

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[96+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm13

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm13
	pslld	xmm2,26-21
	pandn	xmm0,xmm15
	pand	xmm4,xmm14
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm9
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm9
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm10
	movdqa	xmm7,xmm9
	pslld	xmm2,10
	pxor	xmm4,xmm9


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm8,xmm10
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm8,xmm3
	paddd	xmm12,xmm6
	pxor	xmm7,xmm2

	paddd	xmm8,xmm6
	paddd	xmm8,xmm7
	lea	rbp,[256+rbp]
	movdqa	xmm6,XMMWORD[((144-128))+rax]
	paddd	xmm5,XMMWORD[((16-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((96-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm12

	movdqa	xmm2,xmm12

	psrld	xmm7,6
	movdqa	xmm1,xmm12
	pslld	xmm2,7
	movdqa	XMMWORD[(128-128)+rax],xmm5
	paddd	xmm5,xmm15

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-128))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm12

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm12
	pslld	xmm2,26-21
	pandn	xmm0,xmm14
	pand	xmm3,xmm13
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm8
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm8
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm9
	movdqa	xmm7,xmm8
	pslld	xmm2,10
	pxor	xmm3,xmm8


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm15,xmm9
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm15,xmm4
	paddd	xmm11,xmm5
	pxor	xmm7,xmm2

	paddd	xmm15,xmm5
	paddd	xmm15,xmm7
	movdqa	xmm5,XMMWORD[((160-128))+rax]
	paddd	xmm6,XMMWORD[((32-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((112-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm11

	movdqa	xmm2,xmm11

	psrld	xmm7,6
	movdqa	xmm1,xmm11
	pslld	xmm2,7
	movdqa	XMMWORD[(144-128)+rax],xmm6
	paddd	xmm6,xmm14

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[((-96))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm11

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm11
	pslld	xmm2,26-21
	pandn	xmm0,xmm13
	pand	xmm4,xmm12
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm15
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm15
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm8
	movdqa	xmm7,xmm15
	pslld	xmm2,10
	pxor	xmm4,xmm15


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm14,xmm8
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm14,xmm3
	paddd	xmm10,xmm6
	pxor	xmm7,xmm2

	paddd	xmm14,xmm6
	paddd	xmm14,xmm7
	movdqa	xmm6,XMMWORD[((176-128))+rax]
	paddd	xmm5,XMMWORD[((48-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((128-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm10

	movdqa	xmm2,xmm10

	psrld	xmm7,6
	movdqa	xmm1,xmm10
	pslld	xmm2,7
	movdqa	XMMWORD[(160-128)+rax],xmm5
	paddd	xmm5,xmm13

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[((-64))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm10

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm10
	pslld	xmm2,26-21
	pandn	xmm0,xmm12
	pand	xmm3,xmm11
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm14
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm14
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm15
	movdqa	xmm7,xmm14
	pslld	xmm2,10
	pxor	xmm3,xmm14


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm13,xmm15
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm13,xmm4
	paddd	xmm9,xmm5
	pxor	xmm7,xmm2

	paddd	xmm13,xmm5
	paddd	xmm13,xmm7
	movdqa	xmm5,XMMWORD[((192-128))+rax]
	paddd	xmm6,XMMWORD[((64-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((144-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm9

	movdqa	xmm2,xmm9

	psrld	xmm7,6
	movdqa	xmm1,xmm9
	pslld	xmm2,7
	movdqa	XMMWORD[(176-128)+rax],xmm6
	paddd	xmm6,xmm12

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[((-32))+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm9

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm9
	pslld	xmm2,26-21
	pandn	xmm0,xmm11
	pand	xmm4,xmm10
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm13
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm13
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm14
	movdqa	xmm7,xmm13
	pslld	xmm2,10
	pxor	xmm4,xmm13


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm12,xmm14
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm12,xmm3
	paddd	xmm8,xmm6
	pxor	xmm7,xmm2

	paddd	xmm12,xmm6
	paddd	xmm12,xmm7
	movdqa	xmm6,XMMWORD[((208-128))+rax]
	paddd	xmm5,XMMWORD[((80-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((160-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm8

	movdqa	xmm2,xmm8

	psrld	xmm7,6
	movdqa	xmm1,xmm8
	pslld	xmm2,7
	movdqa	XMMWORD[(192-128)+rax],xmm5
	paddd	xmm5,xmm11

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm8

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm8
	pslld	xmm2,26-21
	pandn	xmm0,xmm10
	pand	xmm3,xmm9
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm12
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm12
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm13
	movdqa	xmm7,xmm12
	pslld	xmm2,10
	pxor	xmm3,xmm12


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm11,xmm13
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm11,xmm4
	paddd	xmm15,xmm5
	pxor	xmm7,xmm2

	paddd	xmm11,xmm5
	paddd	xmm11,xmm7
	movdqa	xmm5,XMMWORD[((224-128))+rax]
	paddd	xmm6,XMMWORD[((96-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((176-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm15

	movdqa	xmm2,xmm15

	psrld	xmm7,6
	movdqa	xmm1,xmm15
	pslld	xmm2,7
	movdqa	XMMWORD[(208-128)+rax],xmm6
	paddd	xmm6,xmm10

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[32+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm15

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm15
	pslld	xmm2,26-21
	pandn	xmm0,xmm9
	pand	xmm4,xmm8
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm11
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm11
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm12
	movdqa	xmm7,xmm11
	pslld	xmm2,10
	pxor	xmm4,xmm11


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm10,xmm12
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm10,xmm3
	paddd	xmm14,xmm6
	pxor	xmm7,xmm2

	paddd	xmm10,xmm6
	paddd	xmm10,xmm7
	movdqa	xmm6,XMMWORD[((240-128))+rax]
	paddd	xmm5,XMMWORD[((112-128))+rax]

	movdqa	xmm7,xmm6
	movdqa	xmm1,xmm6
	psrld	xmm7,3
	movdqa	xmm2,xmm6

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((192-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm3,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm3

	psrld	xmm3,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	psrld	xmm3,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm3
	pxor	xmm0,xmm1
	paddd	xmm5,xmm0
	movdqa	xmm7,xmm14

	movdqa	xmm2,xmm14

	psrld	xmm7,6
	movdqa	xmm1,xmm14
	pslld	xmm2,7
	movdqa	XMMWORD[(224-128)+rax],xmm5
	paddd	xmm5,xmm9

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm5,XMMWORD[64+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm14

	pxor	xmm7,xmm2
	movdqa	xmm3,xmm14
	pslld	xmm2,26-21
	pandn	xmm0,xmm8
	pand	xmm3,xmm15
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm10
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm10
	psrld	xmm1,2
	paddd	xmm5,xmm7
	pxor	xmm0,xmm3
	movdqa	xmm3,xmm11
	movdqa	xmm7,xmm10
	pslld	xmm2,10
	pxor	xmm3,xmm10


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm5,xmm0
	pslld	xmm2,19-10
	pand	xmm4,xmm3
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm9,xmm11
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm9,xmm4
	paddd	xmm13,xmm5
	pxor	xmm7,xmm2

	paddd	xmm9,xmm5
	paddd	xmm9,xmm7
	movdqa	xmm5,XMMWORD[((0-128))+rax]
	paddd	xmm6,XMMWORD[((128-128))+rax]

	movdqa	xmm7,xmm5
	movdqa	xmm1,xmm5
	psrld	xmm7,3
	movdqa	xmm2,xmm5

	psrld	xmm1,7
	movdqa	xmm0,XMMWORD[((208-128))+rax]
	pslld	xmm2,14
	pxor	xmm7,xmm1
	psrld	xmm1,18-7
	movdqa	xmm4,xmm0
	pxor	xmm7,xmm2
	pslld	xmm2,25-14
	pxor	xmm7,xmm1
	psrld	xmm0,10
	movdqa	xmm1,xmm4

	psrld	xmm4,17
	pxor	xmm7,xmm2
	pslld	xmm1,13
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	psrld	xmm4,19-17
	pxor	xmm0,xmm1
	pslld	xmm1,15-13
	pxor	xmm0,xmm4
	pxor	xmm0,xmm1
	paddd	xmm6,xmm0
	movdqa	xmm7,xmm13

	movdqa	xmm2,xmm13

	psrld	xmm7,6
	movdqa	xmm1,xmm13
	pslld	xmm2,7
	movdqa	XMMWORD[(240-128)+rax],xmm6
	paddd	xmm6,xmm8

	psrld	xmm1,11
	pxor	xmm7,xmm2
	pslld	xmm2,21-7
	paddd	xmm6,XMMWORD[96+rbp]
	pxor	xmm7,xmm1

	psrld	xmm1,25-11
	movdqa	xmm0,xmm13

	pxor	xmm7,xmm2
	movdqa	xmm4,xmm13
	pslld	xmm2,26-21
	pandn	xmm0,xmm15
	pand	xmm4,xmm14
	pxor	xmm7,xmm1


	movdqa	xmm1,xmm9
	pxor	xmm7,xmm2
	movdqa	xmm2,xmm9
	psrld	xmm1,2
	paddd	xmm6,xmm7
	pxor	xmm0,xmm4
	movdqa	xmm4,xmm10
	movdqa	xmm7,xmm9
	pslld	xmm2,10
	pxor	xmm4,xmm9


	psrld	xmm7,13
	pxor	xmm1,xmm2
	paddd	xmm6,xmm0
	pslld	xmm2,19-10
	pand	xmm3,xmm4
	pxor	xmm1,xmm7


	psrld	xmm7,22-13
	pxor	xmm1,xmm2
	movdqa	xmm8,xmm10
	pslld	xmm2,30-19
	pxor	xmm7,xmm1
	pxor	xmm8,xmm3
	paddd	xmm12,xmm6
	pxor	xmm7,xmm2

	paddd	xmm8,xmm6
	paddd	xmm8,xmm7
	lea	rbp,[256+rbp]
	dec	ecx
	jnz	NEAR $L$oop_16_xx

	mov	ecx,1
	lea	rbp,[((K256+128))]

	movdqa	xmm7,XMMWORD[rbx]
	cmp	ecx,DWORD[rbx]
	pxor	xmm0,xmm0
	cmovge	r8,rbp
	cmp	ecx,DWORD[4+rbx]
	movdqa	xmm6,xmm7
	cmovge	r9,rbp
	cmp	ecx,DWORD[8+rbx]
	pcmpgtd	xmm6,xmm0
	cmovge	r10,rbp
	cmp	ecx,DWORD[12+rbx]
	paddd	xmm7,xmm6
	cmovge	r11,rbp

	movdqu	xmm0,XMMWORD[((0-128))+rdi]
	pand	xmm8,xmm6
	movdqu	xmm1,XMMWORD[((32-128))+rdi]
	pand	xmm9,xmm6
	movdqu	xmm2,XMMWORD[((64-128))+rdi]
	pand	xmm10,xmm6
	movdqu	xmm5,XMMWORD[((96-128))+rdi]
	pand	xmm11,xmm6
	paddd	xmm8,xmm0
	movdqu	xmm0,XMMWORD[((128-128))+rdi]
	pand	xmm12,xmm6
	paddd	xmm9,xmm1
	movdqu	xmm1,XMMWORD[((160-128))+rdi]
	pand	xmm13,xmm6
	paddd	xmm10,xmm2
	movdqu	xmm2,XMMWORD[((192-128))+rdi]
	pand	xmm14,xmm6
	paddd	xmm11,xmm5
	movdqu	xmm5,XMMWORD[((224-128))+rdi]
	pand	xmm15,xmm6
	paddd	xmm12,xmm0
	paddd	xmm13,xmm1
	movdqu	XMMWORD[(0-128)+rdi],xmm8
	paddd	xmm14,xmm2
	movdqu	XMMWORD[(32-128)+rdi],xmm9
	paddd	xmm15,xmm5
	movdqu	XMMWORD[(64-128)+rdi],xmm10
	movdqu	XMMWORD[(96-128)+rdi],xmm11
	movdqu	XMMWORD[(128-128)+rdi],xmm12
	movdqu	XMMWORD[(160-128)+rdi],xmm13
	movdqu	XMMWORD[(192-128)+rdi],xmm14
	movdqu	XMMWORD[(224-128)+rdi],xmm15

	movdqa	XMMWORD[rbx],xmm7
	movdqa	xmm6,XMMWORD[$L$pbswap]
	dec	edx
	jnz	NEAR $L$oop

	mov	edx,DWORD[280+rsp]
	lea	rdi,[16+rdi]
	lea	rsi,[64+rsi]
	dec	edx
	jnz	NEAR $L$oop_grande

$L$done:
	mov	rax,QWORD[272+rsp]

	movaps	xmm6,XMMWORD[((-184))+rax]
	movaps	xmm7,XMMWORD[((-168))+rax]
	movaps	xmm8,XMMWORD[((-152))+rax]
	movaps	xmm9,XMMWORD[((-136))+rax]
	movaps	xmm10,XMMWORD[((-120))+rax]
	movaps	xmm11,XMMWORD[((-104))+rax]
	movaps	xmm12,XMMWORD[((-88))+rax]
	movaps	xmm13,XMMWORD[((-72))+rax]
	movaps	xmm14,XMMWORD[((-56))+rax]
	movaps	xmm15,XMMWORD[((-40))+rax]
	mov	rbp,QWORD[((-16))+rax]

	mov	rbx,QWORD[((-8))+rax]

	lea	rsp,[rax]

$L$epilogue:
	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
	mov	rsi,QWORD[16+rsp]
	DB	0F3h,0C3h		;repret

$L$SEH_end_sha256_multi_block:

ALIGN	32
sha256_multi_block_shaext:
	mov	QWORD[8+rsp],rdi	;WIN64 prologue
	mov	QWORD[16+rsp],rsi
	mov	rax,rsp
$L$SEH_begin_sha256_multi_block_shaext:
	mov	rdi,rcx
	mov	rsi,rdx
	mov	rdx,r8



_shaext_shortcut:
	mov	rax,rsp

	push	rbx

	push	rbp

	lea	rsp,[((-168))+rsp]
	movaps	XMMWORD[rsp],xmm6
	movaps	XMMWORD[16+rsp],xmm7
	movaps	XMMWORD[32+rsp],xmm8
	movaps	XMMWORD[48+rsp],xmm9
	movaps	XMMWORD[(-120)+rax],xmm10
	movaps	XMMWORD[(-104)+rax],xmm11
	movaps	XMMWORD[(-88)+rax],xmm12
	movaps	XMMWORD[(-72)+rax],xmm13
	movaps	XMMWORD[(-56)+rax],xmm14
	movaps	XMMWORD[(-40)+rax],xmm15
	sub	rsp,288
	shl	edx,1
	and	rsp,-256
	lea	rdi,[128+rdi]
	mov	QWORD[272+rsp],rax
$L$body_shaext:
	lea	rbx,[256+rsp]
	lea	rbp,[((K256_shaext+128))]

$L$oop_grande_shaext:
	mov	DWORD[280+rsp],edx
	xor	edx,edx
	mov	r8,QWORD[rsi]
	mov	ecx,DWORD[8+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[rbx],ecx
	cmovle	r8,rsp
	mov	r9,QWORD[16+rsi]
	mov	ecx,DWORD[24+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[4+rbx],ecx
	cmovle	r9,rsp
	test	edx,edx
	jz	NEAR $L$done_shaext

	movq	xmm12,QWORD[((0-128))+rdi]
	movq	xmm4,QWORD[((32-128))+rdi]
	movq	xmm13,QWORD[((64-128))+rdi]
	movq	xmm5,QWORD[((96-128))+rdi]
	movq	xmm8,QWORD[((128-128))+rdi]
	movq	xmm9,QWORD[((160-128))+rdi]
	movq	xmm10,QWORD[((192-128))+rdi]
	movq	xmm11,QWORD[((224-128))+rdi]

	punpckldq	xmm12,xmm4
	punpckldq	xmm13,xmm5
	punpckldq	xmm8,xmm9
	punpckldq	xmm10,xmm11
	movdqa	xmm3,XMMWORD[((K256_shaext-16))]

	movdqa	xmm14,xmm12
	movdqa	xmm15,xmm13
	punpcklqdq	xmm12,xmm8
	punpcklqdq	xmm13,xmm10
	punpckhqdq	xmm14,xmm8
	punpckhqdq	xmm15,xmm10

	pshufd	xmm12,xmm12,27
	pshufd	xmm13,xmm13,27
	pshufd	xmm14,xmm14,27
	pshufd	xmm15,xmm15,27
	jmp	NEAR $L$oop_shaext

ALIGN	32
$L$oop_shaext:
	movdqu	xmm4,XMMWORD[r8]
	movdqu	xmm8,XMMWORD[r9]
	movdqu	xmm5,XMMWORD[16+r8]
	movdqu	xmm9,XMMWORD[16+r9]
	movdqu	xmm6,XMMWORD[32+r8]
DB	102,15,56,0,227
	movdqu	xmm10,XMMWORD[32+r9]
DB	102,68,15,56,0,195
	movdqu	xmm7,XMMWORD[48+r8]
	lea	r8,[64+r8]
	movdqu	xmm11,XMMWORD[48+r9]
	lea	r9,[64+r9]

	movdqa	xmm0,XMMWORD[((0-128))+rbp]
DB	102,15,56,0,235
	paddd	xmm0,xmm4
	pxor	xmm4,xmm12
	movdqa	xmm1,xmm0
	movdqa	xmm2,XMMWORD[((0-128))+rbp]
DB	102,68,15,56,0,203
	paddd	xmm2,xmm8
	movdqa	XMMWORD[80+rsp],xmm13
DB	69,15,56,203,236
	pxor	xmm8,xmm14
	movdqa	xmm0,xmm2
	movdqa	XMMWORD[112+rsp],xmm15
DB	69,15,56,203,254
	pshufd	xmm0,xmm1,0x0e
	pxor	xmm4,xmm12
	movdqa	XMMWORD[64+rsp],xmm12
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	pxor	xmm8,xmm14
	movdqa	XMMWORD[96+rsp],xmm14
	movdqa	xmm1,XMMWORD[((16-128))+rbp]
	paddd	xmm1,xmm5
DB	102,15,56,0,243
DB	69,15,56,203,247

	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((16-128))+rbp]
	paddd	xmm2,xmm9
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	prefetcht0	[127+r8]
DB	102,15,56,0,251
DB	102,68,15,56,0,211
	prefetcht0	[127+r9]
DB	69,15,56,203,254
	pshufd	xmm0,xmm1,0x0e
DB	102,68,15,56,0,219
DB	15,56,204,229
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((32-128))+rbp]
	paddd	xmm1,xmm6
DB	69,15,56,203,247

	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((32-128))+rbp]
	paddd	xmm2,xmm10
DB	69,15,56,203,236
DB	69,15,56,204,193
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm7
DB	69,15,56,203,254
	pshufd	xmm0,xmm1,0x0e
DB	102,15,58,15,222,4
	paddd	xmm4,xmm3
	movdqa	xmm3,xmm11
DB	102,65,15,58,15,218,4
DB	15,56,204,238
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((48-128))+rbp]
	paddd	xmm1,xmm7
DB	69,15,56,203,247
DB	69,15,56,204,202

	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((48-128))+rbp]
	paddd	xmm8,xmm3
	paddd	xmm2,xmm11
DB	15,56,205,231
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm4
DB	102,15,58,15,223,4
DB	69,15,56,203,254
DB	69,15,56,205,195
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm5,xmm3
	movdqa	xmm3,xmm8
DB	102,65,15,58,15,219,4
DB	15,56,204,247
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((64-128))+rbp]
	paddd	xmm1,xmm4
DB	69,15,56,203,247
DB	69,15,56,204,211
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((64-128))+rbp]
	paddd	xmm9,xmm3
	paddd	xmm2,xmm8
DB	15,56,205,236
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm5
DB	102,15,58,15,220,4
DB	69,15,56,203,254
DB	69,15,56,205,200
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm6,xmm3
	movdqa	xmm3,xmm9
DB	102,65,15,58,15,216,4
DB	15,56,204,252
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((80-128))+rbp]
	paddd	xmm1,xmm5
DB	69,15,56,203,247
DB	69,15,56,204,216
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((80-128))+rbp]
	paddd	xmm10,xmm3
	paddd	xmm2,xmm9
DB	15,56,205,245
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm6
DB	102,15,58,15,221,4
DB	69,15,56,203,254
DB	69,15,56,205,209
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm7,xmm3
	movdqa	xmm3,xmm10
DB	102,65,15,58,15,217,4
DB	15,56,204,229
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((96-128))+rbp]
	paddd	xmm1,xmm6
DB	69,15,56,203,247
DB	69,15,56,204,193
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((96-128))+rbp]
	paddd	xmm11,xmm3
	paddd	xmm2,xmm10
DB	15,56,205,254
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm7
DB	102,15,58,15,222,4
DB	69,15,56,203,254
DB	69,15,56,205,218
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm4,xmm3
	movdqa	xmm3,xmm11
DB	102,65,15,58,15,218,4
DB	15,56,204,238
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((112-128))+rbp]
	paddd	xmm1,xmm7
DB	69,15,56,203,247
DB	69,15,56,204,202
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((112-128))+rbp]
	paddd	xmm8,xmm3
	paddd	xmm2,xmm11
DB	15,56,205,231
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm4
DB	102,15,58,15,223,4
DB	69,15,56,203,254
DB	69,15,56,205,195
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm5,xmm3
	movdqa	xmm3,xmm8
DB	102,65,15,58,15,219,4
DB	15,56,204,247
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((128-128))+rbp]
	paddd	xmm1,xmm4
DB	69,15,56,203,247
DB	69,15,56,204,211
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((128-128))+rbp]
	paddd	xmm9,xmm3
	paddd	xmm2,xmm8
DB	15,56,205,236
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm5
DB	102,15,58,15,220,4
DB	69,15,56,203,254
DB	69,15,56,205,200
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm6,xmm3
	movdqa	xmm3,xmm9
DB	102,65,15,58,15,216,4
DB	15,56,204,252
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((144-128))+rbp]
	paddd	xmm1,xmm5
DB	69,15,56,203,247
DB	69,15,56,204,216
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((144-128))+rbp]
	paddd	xmm10,xmm3
	paddd	xmm2,xmm9
DB	15,56,205,245
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm6
DB	102,15,58,15,221,4
DB	69,15,56,203,254
DB	69,15,56,205,209
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm7,xmm3
	movdqa	xmm3,xmm10
DB	102,65,15,58,15,217,4
DB	15,56,204,229
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((160-128))+rbp]
	paddd	xmm1,xmm6
DB	69,15,56,203,247
DB	69,15,56,204,193
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((160-128))+rbp]
	paddd	xmm11,xmm3
	paddd	xmm2,xmm10
DB	15,56,205,254
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm7
DB	102,15,58,15,222,4
DB	69,15,56,203,254
DB	69,15,56,205,218
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm4,xmm3
	movdqa	xmm3,xmm11
DB	102,65,15,58,15,218,4
DB	15,56,204,238
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((176-128))+rbp]
	paddd	xmm1,xmm7
DB	69,15,56,203,247
DB	69,15,56,204,202
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((176-128))+rbp]
	paddd	xmm8,xmm3
	paddd	xmm2,xmm11
DB	15,56,205,231
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm4
DB	102,15,58,15,223,4
DB	69,15,56,203,254
DB	69,15,56,205,195
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm5,xmm3
	movdqa	xmm3,xmm8
DB	102,65,15,58,15,219,4
DB	15,56,204,247
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((192-128))+rbp]
	paddd	xmm1,xmm4
DB	69,15,56,203,247
DB	69,15,56,204,211
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((192-128))+rbp]
	paddd	xmm9,xmm3
	paddd	xmm2,xmm8
DB	15,56,205,236
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm5
DB	102,15,58,15,220,4
DB	69,15,56,203,254
DB	69,15,56,205,200
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm6,xmm3
	movdqa	xmm3,xmm9
DB	102,65,15,58,15,216,4
DB	15,56,204,252
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((208-128))+rbp]
	paddd	xmm1,xmm5
DB	69,15,56,203,247
DB	69,15,56,204,216
	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((208-128))+rbp]
	paddd	xmm10,xmm3
	paddd	xmm2,xmm9
DB	15,56,205,245
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	movdqa	xmm3,xmm6
DB	102,15,58,15,221,4
DB	69,15,56,203,254
DB	69,15,56,205,209
	pshufd	xmm0,xmm1,0x0e
	paddd	xmm7,xmm3
	movdqa	xmm3,xmm10
DB	102,65,15,58,15,217,4
	nop
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm1,XMMWORD[((224-128))+rbp]
	paddd	xmm1,xmm6
DB	69,15,56,203,247

	movdqa	xmm0,xmm1
	movdqa	xmm2,XMMWORD[((224-128))+rbp]
	paddd	xmm11,xmm3
	paddd	xmm2,xmm10
DB	15,56,205,254
	nop
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	mov	ecx,1
	pxor	xmm6,xmm6
DB	69,15,56,203,254
DB	69,15,56,205,218
	pshufd	xmm0,xmm1,0x0e
	movdqa	xmm1,XMMWORD[((240-128))+rbp]
	paddd	xmm1,xmm7
	movq	xmm7,QWORD[rbx]
	nop
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	movdqa	xmm2,XMMWORD[((240-128))+rbp]
	paddd	xmm2,xmm11
DB	69,15,56,203,247

	movdqa	xmm0,xmm1
	cmp	ecx,DWORD[rbx]
	cmovge	r8,rsp
	cmp	ecx,DWORD[4+rbx]
	cmovge	r9,rsp
	pshufd	xmm9,xmm7,0x00
DB	69,15,56,203,236
	movdqa	xmm0,xmm2
	pshufd	xmm10,xmm7,0x55
	movdqa	xmm11,xmm7
DB	69,15,56,203,254
	pshufd	xmm0,xmm1,0x0e
	pcmpgtd	xmm9,xmm6
	pcmpgtd	xmm10,xmm6
DB	69,15,56,203,229
	pshufd	xmm0,xmm2,0x0e
	pcmpgtd	xmm11,xmm6
	movdqa	xmm3,XMMWORD[((K256_shaext-16))]
DB	69,15,56,203,247

	pand	xmm13,xmm9
	pand	xmm15,xmm10
	pand	xmm12,xmm9
	pand	xmm14,xmm10
	paddd	xmm11,xmm7

	paddd	xmm13,XMMWORD[80+rsp]
	paddd	xmm15,XMMWORD[112+rsp]
	paddd	xmm12,XMMWORD[64+rsp]
	paddd	xmm14,XMMWORD[96+rsp]

	movq	QWORD[rbx],xmm11
	dec	edx
	jnz	NEAR $L$oop_shaext

	mov	edx,DWORD[280+rsp]

	pshufd	xmm12,xmm12,27
	pshufd	xmm13,xmm13,27
	pshufd	xmm14,xmm14,27
	pshufd	xmm15,xmm15,27

	movdqa	xmm5,xmm12
	movdqa	xmm6,xmm13
	punpckldq	xmm12,xmm14
	punpckhdq	xmm5,xmm14
	punpckldq	xmm13,xmm15
	punpckhdq	xmm6,xmm15

	movq	QWORD[(0-128)+rdi],xmm12
	psrldq	xmm12,8
	movq	QWORD[(128-128)+rdi],xmm5
	psrldq	xmm5,8
	movq	QWORD[(32-128)+rdi],xmm12
	movq	QWORD[(160-128)+rdi],xmm5

	movq	QWORD[(64-128)+rdi],xmm13
	psrldq	xmm13,8
	movq	QWORD[(192-128)+rdi],xmm6
	psrldq	xmm6,8
	movq	QWORD[(96-128)+rdi],xmm13
	movq	QWORD[(224-128)+rdi],xmm6

	lea	rdi,[8+rdi]
	lea	rsi,[32+rsi]
	dec	edx
	jnz	NEAR $L$oop_grande_shaext

$L$done_shaext:

	movaps	xmm6,XMMWORD[((-184))+rax]
	movaps	xmm7,XMMWORD[((-168))+rax]
	movaps	xmm8,XMMWORD[((-152))+rax]
	movaps	xmm9,XMMWORD[((-136))+rax]
	movaps	xmm10,XMMWORD[((-120))+rax]
	movaps	xmm11,XMMWORD[((-104))+rax]
	movaps	xmm12,XMMWORD[((-88))+rax]
	movaps	xmm13,XMMWORD[((-72))+rax]
	movaps	xmm14,XMMWORD[((-56))+rax]
	movaps	xmm15,XMMWORD[((-40))+rax]
	mov	rbp,QWORD[((-16))+rax]

	mov	rbx,QWORD[((-8))+rax]

	lea	rsp,[rax]

$L$epilogue_shaext:
	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
	mov	rsi,QWORD[16+rsp]
	DB	0F3h,0C3h		;repret

$L$SEH_end_sha256_multi_block_shaext:

ALIGN	32
sha256_multi_block_avx:
	mov	QWORD[8+rsp],rdi	;WIN64 prologue
	mov	QWORD[16+rsp],rsi
	mov	rax,rsp
$L$SEH_begin_sha256_multi_block_avx:
	mov	rdi,rcx
	mov	rsi,rdx
	mov	rdx,r8



_avx_shortcut:
	shr	rcx,32
	cmp	edx,2
	jb	NEAR $L$avx
	test	ecx,32
	jnz	NEAR _avx2_shortcut
	jmp	NEAR $L$avx
ALIGN	32
$L$avx:
	mov	rax,rsp

	push	rbx

	push	rbp

	lea	rsp,[((-168))+rsp]
	movaps	XMMWORD[rsp],xmm6
	movaps	XMMWORD[16+rsp],xmm7
	movaps	XMMWORD[32+rsp],xmm8
	movaps	XMMWORD[48+rsp],xmm9
	movaps	XMMWORD[(-120)+rax],xmm10
	movaps	XMMWORD[(-104)+rax],xmm11
	movaps	XMMWORD[(-88)+rax],xmm12
	movaps	XMMWORD[(-72)+rax],xmm13
	movaps	XMMWORD[(-56)+rax],xmm14
	movaps	XMMWORD[(-40)+rax],xmm15
	sub	rsp,288
	and	rsp,-256
	mov	QWORD[272+rsp],rax

$L$body_avx:
	lea	rbp,[((K256+128))]
	lea	rbx,[256+rsp]
	lea	rdi,[128+rdi]

$L$oop_grande_avx:
	mov	DWORD[280+rsp],edx
	xor	edx,edx
	mov	r8,QWORD[rsi]
	mov	ecx,DWORD[8+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[rbx],ecx
	cmovle	r8,rbp
	mov	r9,QWORD[16+rsi]
	mov	ecx,DWORD[24+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[4+rbx],ecx
	cmovle	r9,rbp
	mov	r10,QWORD[32+rsi]
	mov	ecx,DWORD[40+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[8+rbx],ecx
	cmovle	r10,rbp
	mov	r11,QWORD[48+rsi]
	mov	ecx,DWORD[56+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[12+rbx],ecx
	cmovle	r11,rbp
	test	edx,edx
	jz	NEAR $L$done_avx

	vmovdqu	xmm8,XMMWORD[((0-128))+rdi]
	lea	rax,[128+rsp]
	vmovdqu	xmm9,XMMWORD[((32-128))+rdi]
	vmovdqu	xmm10,XMMWORD[((64-128))+rdi]
	vmovdqu	xmm11,XMMWORD[((96-128))+rdi]
	vmovdqu	xmm12,XMMWORD[((128-128))+rdi]
	vmovdqu	xmm13,XMMWORD[((160-128))+rdi]
	vmovdqu	xmm14,XMMWORD[((192-128))+rdi]
	vmovdqu	xmm15,XMMWORD[((224-128))+rdi]
	vmovdqu	xmm6,XMMWORD[$L$pbswap]
	jmp	NEAR $L$oop_avx

ALIGN	32
$L$oop_avx:
	vpxor	xmm4,xmm10,xmm9
	vmovd	xmm5,DWORD[r8]
	vmovd	xmm0,DWORD[r9]
	vpinsrd	xmm5,xmm5,DWORD[r10],1
	vpinsrd	xmm0,xmm0,DWORD[r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm12,6
	vpslld	xmm2,xmm12,26
	vmovdqu	XMMWORD[(0-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm15

	vpsrld	xmm1,xmm12,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm12,21
	vpaddd	xmm5,xmm5,XMMWORD[((-128))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm12,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,7
	vpandn	xmm0,xmm12,xmm14
	vpand	xmm3,xmm12,xmm13

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm15,xmm8,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm8,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm9,xmm8

	vpxor	xmm15,xmm15,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm8,13

	vpslld	xmm2,xmm8,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm15,xmm1

	vpsrld	xmm1,xmm8,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,10
	vpxor	xmm15,xmm9,xmm4
	vpaddd	xmm11,xmm11,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm15,xmm15,xmm5
	vpaddd	xmm15,xmm15,xmm7
	vmovd	xmm5,DWORD[4+r8]
	vmovd	xmm0,DWORD[4+r9]
	vpinsrd	xmm5,xmm5,DWORD[4+r10],1
	vpinsrd	xmm0,xmm0,DWORD[4+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm11,6
	vpslld	xmm2,xmm11,26
	vmovdqu	XMMWORD[(16-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm14

	vpsrld	xmm1,xmm11,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm11,21
	vpaddd	xmm5,xmm5,XMMWORD[((-96))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm11,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,7
	vpandn	xmm0,xmm11,xmm13
	vpand	xmm4,xmm11,xmm12

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm14,xmm15,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm15,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm8,xmm15

	vpxor	xmm14,xmm14,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm15,13

	vpslld	xmm2,xmm15,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm14,xmm1

	vpsrld	xmm1,xmm15,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,10
	vpxor	xmm14,xmm8,xmm3
	vpaddd	xmm10,xmm10,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm14,xmm14,xmm5
	vpaddd	xmm14,xmm14,xmm7
	vmovd	xmm5,DWORD[8+r8]
	vmovd	xmm0,DWORD[8+r9]
	vpinsrd	xmm5,xmm5,DWORD[8+r10],1
	vpinsrd	xmm0,xmm0,DWORD[8+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm10,6
	vpslld	xmm2,xmm10,26
	vmovdqu	XMMWORD[(32-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm13

	vpsrld	xmm1,xmm10,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm10,21
	vpaddd	xmm5,xmm5,XMMWORD[((-64))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm10,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,7
	vpandn	xmm0,xmm10,xmm12
	vpand	xmm3,xmm10,xmm11

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm13,xmm14,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm14,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm15,xmm14

	vpxor	xmm13,xmm13,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm14,13

	vpslld	xmm2,xmm14,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm13,xmm1

	vpsrld	xmm1,xmm14,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,10
	vpxor	xmm13,xmm15,xmm4
	vpaddd	xmm9,xmm9,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm13,xmm13,xmm5
	vpaddd	xmm13,xmm13,xmm7
	vmovd	xmm5,DWORD[12+r8]
	vmovd	xmm0,DWORD[12+r9]
	vpinsrd	xmm5,xmm5,DWORD[12+r10],1
	vpinsrd	xmm0,xmm0,DWORD[12+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm9,6
	vpslld	xmm2,xmm9,26
	vmovdqu	XMMWORD[(48-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm12

	vpsrld	xmm1,xmm9,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm9,21
	vpaddd	xmm5,xmm5,XMMWORD[((-32))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm9,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,7
	vpandn	xmm0,xmm9,xmm11
	vpand	xmm4,xmm9,xmm10

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm12,xmm13,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm13,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm14,xmm13

	vpxor	xmm12,xmm12,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm13,13

	vpslld	xmm2,xmm13,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm12,xmm1

	vpsrld	xmm1,xmm13,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,10
	vpxor	xmm12,xmm14,xmm3
	vpaddd	xmm8,xmm8,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm12,xmm12,xmm5
	vpaddd	xmm12,xmm12,xmm7
	vmovd	xmm5,DWORD[16+r8]
	vmovd	xmm0,DWORD[16+r9]
	vpinsrd	xmm5,xmm5,DWORD[16+r10],1
	vpinsrd	xmm0,xmm0,DWORD[16+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm8,6
	vpslld	xmm2,xmm8,26
	vmovdqu	XMMWORD[(64-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm11

	vpsrld	xmm1,xmm8,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm8,21
	vpaddd	xmm5,xmm5,XMMWORD[rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm8,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,7
	vpandn	xmm0,xmm8,xmm10
	vpand	xmm3,xmm8,xmm9

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm11,xmm12,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm12,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm13,xmm12

	vpxor	xmm11,xmm11,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm12,13

	vpslld	xmm2,xmm12,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm11,xmm1

	vpsrld	xmm1,xmm12,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,10
	vpxor	xmm11,xmm13,xmm4
	vpaddd	xmm15,xmm15,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm11,xmm11,xmm5
	vpaddd	xmm11,xmm11,xmm7
	vmovd	xmm5,DWORD[20+r8]
	vmovd	xmm0,DWORD[20+r9]
	vpinsrd	xmm5,xmm5,DWORD[20+r10],1
	vpinsrd	xmm0,xmm0,DWORD[20+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm15,6
	vpslld	xmm2,xmm15,26
	vmovdqu	XMMWORD[(80-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm10

	vpsrld	xmm1,xmm15,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm15,21
	vpaddd	xmm5,xmm5,XMMWORD[32+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm15,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,7
	vpandn	xmm0,xmm15,xmm9
	vpand	xmm4,xmm15,xmm8

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm10,xmm11,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm11,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm12,xmm11

	vpxor	xmm10,xmm10,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm11,13

	vpslld	xmm2,xmm11,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm10,xmm1

	vpsrld	xmm1,xmm11,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,10
	vpxor	xmm10,xmm12,xmm3
	vpaddd	xmm14,xmm14,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm10,xmm10,xmm5
	vpaddd	xmm10,xmm10,xmm7
	vmovd	xmm5,DWORD[24+r8]
	vmovd	xmm0,DWORD[24+r9]
	vpinsrd	xmm5,xmm5,DWORD[24+r10],1
	vpinsrd	xmm0,xmm0,DWORD[24+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm14,6
	vpslld	xmm2,xmm14,26
	vmovdqu	XMMWORD[(96-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm9

	vpsrld	xmm1,xmm14,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm14,21
	vpaddd	xmm5,xmm5,XMMWORD[64+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm14,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,7
	vpandn	xmm0,xmm14,xmm8
	vpand	xmm3,xmm14,xmm15

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm9,xmm10,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm10,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm11,xmm10

	vpxor	xmm9,xmm9,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm10,13

	vpslld	xmm2,xmm10,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm9,xmm1

	vpsrld	xmm1,xmm10,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,10
	vpxor	xmm9,xmm11,xmm4
	vpaddd	xmm13,xmm13,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm9,xmm9,xmm5
	vpaddd	xmm9,xmm9,xmm7
	vmovd	xmm5,DWORD[28+r8]
	vmovd	xmm0,DWORD[28+r9]
	vpinsrd	xmm5,xmm5,DWORD[28+r10],1
	vpinsrd	xmm0,xmm0,DWORD[28+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm13,6
	vpslld	xmm2,xmm13,26
	vmovdqu	XMMWORD[(112-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm8

	vpsrld	xmm1,xmm13,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm13,21
	vpaddd	xmm5,xmm5,XMMWORD[96+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm13,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,7
	vpandn	xmm0,xmm13,xmm15
	vpand	xmm4,xmm13,xmm14

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm8,xmm9,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm9,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm10,xmm9

	vpxor	xmm8,xmm8,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm9,13

	vpslld	xmm2,xmm9,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm8,xmm1

	vpsrld	xmm1,xmm9,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,10
	vpxor	xmm8,xmm10,xmm3
	vpaddd	xmm12,xmm12,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm8,xmm8,xmm5
	vpaddd	xmm8,xmm8,xmm7
	add	rbp,256
	vmovd	xmm5,DWORD[32+r8]
	vmovd	xmm0,DWORD[32+r9]
	vpinsrd	xmm5,xmm5,DWORD[32+r10],1
	vpinsrd	xmm0,xmm0,DWORD[32+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm12,6
	vpslld	xmm2,xmm12,26
	vmovdqu	XMMWORD[(128-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm15

	vpsrld	xmm1,xmm12,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm12,21
	vpaddd	xmm5,xmm5,XMMWORD[((-128))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm12,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,7
	vpandn	xmm0,xmm12,xmm14
	vpand	xmm3,xmm12,xmm13

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm15,xmm8,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm8,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm9,xmm8

	vpxor	xmm15,xmm15,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm8,13

	vpslld	xmm2,xmm8,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm15,xmm1

	vpsrld	xmm1,xmm8,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,10
	vpxor	xmm15,xmm9,xmm4
	vpaddd	xmm11,xmm11,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm15,xmm15,xmm5
	vpaddd	xmm15,xmm15,xmm7
	vmovd	xmm5,DWORD[36+r8]
	vmovd	xmm0,DWORD[36+r9]
	vpinsrd	xmm5,xmm5,DWORD[36+r10],1
	vpinsrd	xmm0,xmm0,DWORD[36+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm11,6
	vpslld	xmm2,xmm11,26
	vmovdqu	XMMWORD[(144-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm14

	vpsrld	xmm1,xmm11,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm11,21
	vpaddd	xmm5,xmm5,XMMWORD[((-96))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm11,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,7
	vpandn	xmm0,xmm11,xmm13
	vpand	xmm4,xmm11,xmm12

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm14,xmm15,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm15,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm8,xmm15

	vpxor	xmm14,xmm14,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm15,13

	vpslld	xmm2,xmm15,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm14,xmm1

	vpsrld	xmm1,xmm15,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,10
	vpxor	xmm14,xmm8,xmm3
	vpaddd	xmm10,xmm10,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm14,xmm14,xmm5
	vpaddd	xmm14,xmm14,xmm7
	vmovd	xmm5,DWORD[40+r8]
	vmovd	xmm0,DWORD[40+r9]
	vpinsrd	xmm5,xmm5,DWORD[40+r10],1
	vpinsrd	xmm0,xmm0,DWORD[40+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm10,6
	vpslld	xmm2,xmm10,26
	vmovdqu	XMMWORD[(160-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm13

	vpsrld	xmm1,xmm10,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm10,21
	vpaddd	xmm5,xmm5,XMMWORD[((-64))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm10,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,7
	vpandn	xmm0,xmm10,xmm12
	vpand	xmm3,xmm10,xmm11

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm13,xmm14,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm14,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm15,xmm14

	vpxor	xmm13,xmm13,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm14,13

	vpslld	xmm2,xmm14,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm13,xmm1

	vpsrld	xmm1,xmm14,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,10
	vpxor	xmm13,xmm15,xmm4
	vpaddd	xmm9,xmm9,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm13,xmm13,xmm5
	vpaddd	xmm13,xmm13,xmm7
	vmovd	xmm5,DWORD[44+r8]
	vmovd	xmm0,DWORD[44+r9]
	vpinsrd	xmm5,xmm5,DWORD[44+r10],1
	vpinsrd	xmm0,xmm0,DWORD[44+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm9,6
	vpslld	xmm2,xmm9,26
	vmovdqu	XMMWORD[(176-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm12

	vpsrld	xmm1,xmm9,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm9,21
	vpaddd	xmm5,xmm5,XMMWORD[((-32))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm9,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,7
	vpandn	xmm0,xmm9,xmm11
	vpand	xmm4,xmm9,xmm10

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm12,xmm13,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm13,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm14,xmm13

	vpxor	xmm12,xmm12,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm13,13

	vpslld	xmm2,xmm13,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm12,xmm1

	vpsrld	xmm1,xmm13,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,10
	vpxor	xmm12,xmm14,xmm3
	vpaddd	xmm8,xmm8,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm12,xmm12,xmm5
	vpaddd	xmm12,xmm12,xmm7
	vmovd	xmm5,DWORD[48+r8]
	vmovd	xmm0,DWORD[48+r9]
	vpinsrd	xmm5,xmm5,DWORD[48+r10],1
	vpinsrd	xmm0,xmm0,DWORD[48+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm8,6
	vpslld	xmm2,xmm8,26
	vmovdqu	XMMWORD[(192-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm11

	vpsrld	xmm1,xmm8,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm8,21
	vpaddd	xmm5,xmm5,XMMWORD[rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm8,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,7
	vpandn	xmm0,xmm8,xmm10
	vpand	xmm3,xmm8,xmm9

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm11,xmm12,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm12,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm13,xmm12

	vpxor	xmm11,xmm11,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm12,13

	vpslld	xmm2,xmm12,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm11,xmm1

	vpsrld	xmm1,xmm12,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,10
	vpxor	xmm11,xmm13,xmm4
	vpaddd	xmm15,xmm15,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm11,xmm11,xmm5
	vpaddd	xmm11,xmm11,xmm7
	vmovd	xmm5,DWORD[52+r8]
	vmovd	xmm0,DWORD[52+r9]
	vpinsrd	xmm5,xmm5,DWORD[52+r10],1
	vpinsrd	xmm0,xmm0,DWORD[52+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm15,6
	vpslld	xmm2,xmm15,26
	vmovdqu	XMMWORD[(208-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm10

	vpsrld	xmm1,xmm15,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm15,21
	vpaddd	xmm5,xmm5,XMMWORD[32+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm15,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,7
	vpandn	xmm0,xmm15,xmm9
	vpand	xmm4,xmm15,xmm8

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm10,xmm11,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm11,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm12,xmm11

	vpxor	xmm10,xmm10,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm11,13

	vpslld	xmm2,xmm11,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm10,xmm1

	vpsrld	xmm1,xmm11,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,10
	vpxor	xmm10,xmm12,xmm3
	vpaddd	xmm14,xmm14,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm10,xmm10,xmm5
	vpaddd	xmm10,xmm10,xmm7
	vmovd	xmm5,DWORD[56+r8]
	vmovd	xmm0,DWORD[56+r9]
	vpinsrd	xmm5,xmm5,DWORD[56+r10],1
	vpinsrd	xmm0,xmm0,DWORD[56+r11],1
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm14,6
	vpslld	xmm2,xmm14,26
	vmovdqu	XMMWORD[(224-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm9

	vpsrld	xmm1,xmm14,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm14,21
	vpaddd	xmm5,xmm5,XMMWORD[64+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm14,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,7
	vpandn	xmm0,xmm14,xmm8
	vpand	xmm3,xmm14,xmm15

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm9,xmm10,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm10,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm11,xmm10

	vpxor	xmm9,xmm9,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm10,13

	vpslld	xmm2,xmm10,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm9,xmm1

	vpsrld	xmm1,xmm10,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,10
	vpxor	xmm9,xmm11,xmm4
	vpaddd	xmm13,xmm13,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm9,xmm9,xmm5
	vpaddd	xmm9,xmm9,xmm7
	vmovd	xmm5,DWORD[60+r8]
	lea	r8,[64+r8]
	vmovd	xmm0,DWORD[60+r9]
	lea	r9,[64+r9]
	vpinsrd	xmm5,xmm5,DWORD[60+r10],1
	lea	r10,[64+r10]
	vpinsrd	xmm0,xmm0,DWORD[60+r11],1
	lea	r11,[64+r11]
	vpunpckldq	xmm5,xmm5,xmm0
	vpshufb	xmm5,xmm5,xmm6
	vpsrld	xmm7,xmm13,6
	vpslld	xmm2,xmm13,26
	vmovdqu	XMMWORD[(240-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm8

	vpsrld	xmm1,xmm13,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm13,21
	vpaddd	xmm5,xmm5,XMMWORD[96+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm13,25
	vpxor	xmm7,xmm7,xmm2
	prefetcht0	[63+r8]
	vpslld	xmm2,xmm13,7
	vpandn	xmm0,xmm13,xmm15
	vpand	xmm4,xmm13,xmm14
	prefetcht0	[63+r9]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm8,xmm9,2
	vpxor	xmm7,xmm7,xmm2
	prefetcht0	[63+r10]
	vpslld	xmm1,xmm9,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm10,xmm9
	prefetcht0	[63+r11]
	vpxor	xmm8,xmm8,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm9,13

	vpslld	xmm2,xmm9,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm8,xmm1

	vpsrld	xmm1,xmm9,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,10
	vpxor	xmm8,xmm10,xmm3
	vpaddd	xmm12,xmm12,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm8,xmm8,xmm5
	vpaddd	xmm8,xmm8,xmm7
	add	rbp,256
	vmovdqu	xmm5,XMMWORD[((0-128))+rax]
	mov	ecx,3
	jmp	NEAR $L$oop_16_xx_avx
ALIGN	32
$L$oop_16_xx_avx:
	vmovdqu	xmm6,XMMWORD[((16-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((144-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((224-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm12,6
	vpslld	xmm2,xmm12,26
	vmovdqu	XMMWORD[(0-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm15

	vpsrld	xmm1,xmm12,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm12,21
	vpaddd	xmm5,xmm5,XMMWORD[((-128))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm12,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,7
	vpandn	xmm0,xmm12,xmm14
	vpand	xmm3,xmm12,xmm13

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm15,xmm8,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm8,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm9,xmm8

	vpxor	xmm15,xmm15,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm8,13

	vpslld	xmm2,xmm8,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm15,xmm1

	vpsrld	xmm1,xmm8,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,10
	vpxor	xmm15,xmm9,xmm4
	vpaddd	xmm11,xmm11,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm15,xmm15,xmm5
	vpaddd	xmm15,xmm15,xmm7
	vmovdqu	xmm5,XMMWORD[((32-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((160-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((240-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm11,6
	vpslld	xmm2,xmm11,26
	vmovdqu	XMMWORD[(16-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm14

	vpsrld	xmm1,xmm11,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm11,21
	vpaddd	xmm6,xmm6,XMMWORD[((-96))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm11,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,7
	vpandn	xmm0,xmm11,xmm13
	vpand	xmm4,xmm11,xmm12

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm14,xmm15,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm15,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm8,xmm15

	vpxor	xmm14,xmm14,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm15,13

	vpslld	xmm2,xmm15,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm14,xmm1

	vpsrld	xmm1,xmm15,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,10
	vpxor	xmm14,xmm8,xmm3
	vpaddd	xmm10,xmm10,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm14,xmm14,xmm6
	vpaddd	xmm14,xmm14,xmm7
	vmovdqu	xmm6,XMMWORD[((48-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((176-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((0-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm10,6
	vpslld	xmm2,xmm10,26
	vmovdqu	XMMWORD[(32-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm13

	vpsrld	xmm1,xmm10,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm10,21
	vpaddd	xmm5,xmm5,XMMWORD[((-64))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm10,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,7
	vpandn	xmm0,xmm10,xmm12
	vpand	xmm3,xmm10,xmm11

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm13,xmm14,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm14,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm15,xmm14

	vpxor	xmm13,xmm13,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm14,13

	vpslld	xmm2,xmm14,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm13,xmm1

	vpsrld	xmm1,xmm14,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,10
	vpxor	xmm13,xmm15,xmm4
	vpaddd	xmm9,xmm9,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm13,xmm13,xmm5
	vpaddd	xmm13,xmm13,xmm7
	vmovdqu	xmm5,XMMWORD[((64-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((192-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((16-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm9,6
	vpslld	xmm2,xmm9,26
	vmovdqu	XMMWORD[(48-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm12

	vpsrld	xmm1,xmm9,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm9,21
	vpaddd	xmm6,xmm6,XMMWORD[((-32))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm9,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,7
	vpandn	xmm0,xmm9,xmm11
	vpand	xmm4,xmm9,xmm10

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm12,xmm13,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm13,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm14,xmm13

	vpxor	xmm12,xmm12,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm13,13

	vpslld	xmm2,xmm13,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm12,xmm1

	vpsrld	xmm1,xmm13,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,10
	vpxor	xmm12,xmm14,xmm3
	vpaddd	xmm8,xmm8,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm12,xmm12,xmm6
	vpaddd	xmm12,xmm12,xmm7
	vmovdqu	xmm6,XMMWORD[((80-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((208-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((32-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm8,6
	vpslld	xmm2,xmm8,26
	vmovdqu	XMMWORD[(64-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm11

	vpsrld	xmm1,xmm8,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm8,21
	vpaddd	xmm5,xmm5,XMMWORD[rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm8,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,7
	vpandn	xmm0,xmm8,xmm10
	vpand	xmm3,xmm8,xmm9

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm11,xmm12,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm12,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm13,xmm12

	vpxor	xmm11,xmm11,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm12,13

	vpslld	xmm2,xmm12,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm11,xmm1

	vpsrld	xmm1,xmm12,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,10
	vpxor	xmm11,xmm13,xmm4
	vpaddd	xmm15,xmm15,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm11,xmm11,xmm5
	vpaddd	xmm11,xmm11,xmm7
	vmovdqu	xmm5,XMMWORD[((96-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((224-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((48-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm15,6
	vpslld	xmm2,xmm15,26
	vmovdqu	XMMWORD[(80-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm10

	vpsrld	xmm1,xmm15,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm15,21
	vpaddd	xmm6,xmm6,XMMWORD[32+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm15,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,7
	vpandn	xmm0,xmm15,xmm9
	vpand	xmm4,xmm15,xmm8

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm10,xmm11,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm11,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm12,xmm11

	vpxor	xmm10,xmm10,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm11,13

	vpslld	xmm2,xmm11,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm10,xmm1

	vpsrld	xmm1,xmm11,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,10
	vpxor	xmm10,xmm12,xmm3
	vpaddd	xmm14,xmm14,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm10,xmm10,xmm6
	vpaddd	xmm10,xmm10,xmm7
	vmovdqu	xmm6,XMMWORD[((112-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((240-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((64-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm14,6
	vpslld	xmm2,xmm14,26
	vmovdqu	XMMWORD[(96-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm9

	vpsrld	xmm1,xmm14,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm14,21
	vpaddd	xmm5,xmm5,XMMWORD[64+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm14,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,7
	vpandn	xmm0,xmm14,xmm8
	vpand	xmm3,xmm14,xmm15

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm9,xmm10,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm10,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm11,xmm10

	vpxor	xmm9,xmm9,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm10,13

	vpslld	xmm2,xmm10,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm9,xmm1

	vpsrld	xmm1,xmm10,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,10
	vpxor	xmm9,xmm11,xmm4
	vpaddd	xmm13,xmm13,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm9,xmm9,xmm5
	vpaddd	xmm9,xmm9,xmm7
	vmovdqu	xmm5,XMMWORD[((128-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((0-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((80-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm13,6
	vpslld	xmm2,xmm13,26
	vmovdqu	XMMWORD[(112-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm8

	vpsrld	xmm1,xmm13,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm13,21
	vpaddd	xmm6,xmm6,XMMWORD[96+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm13,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,7
	vpandn	xmm0,xmm13,xmm15
	vpand	xmm4,xmm13,xmm14

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm8,xmm9,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm9,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm10,xmm9

	vpxor	xmm8,xmm8,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm9,13

	vpslld	xmm2,xmm9,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm8,xmm1

	vpsrld	xmm1,xmm9,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,10
	vpxor	xmm8,xmm10,xmm3
	vpaddd	xmm12,xmm12,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm8,xmm8,xmm6
	vpaddd	xmm8,xmm8,xmm7
	add	rbp,256
	vmovdqu	xmm6,XMMWORD[((144-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((16-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((96-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm12,6
	vpslld	xmm2,xmm12,26
	vmovdqu	XMMWORD[(128-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm15

	vpsrld	xmm1,xmm12,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm12,21
	vpaddd	xmm5,xmm5,XMMWORD[((-128))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm12,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,7
	vpandn	xmm0,xmm12,xmm14
	vpand	xmm3,xmm12,xmm13

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm15,xmm8,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm8,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm9,xmm8

	vpxor	xmm15,xmm15,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm8,13

	vpslld	xmm2,xmm8,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm15,xmm1

	vpsrld	xmm1,xmm8,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,10
	vpxor	xmm15,xmm9,xmm4
	vpaddd	xmm11,xmm11,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm15,xmm15,xmm5
	vpaddd	xmm15,xmm15,xmm7
	vmovdqu	xmm5,XMMWORD[((160-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((32-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((112-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm11,6
	vpslld	xmm2,xmm11,26
	vmovdqu	XMMWORD[(144-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm14

	vpsrld	xmm1,xmm11,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm11,21
	vpaddd	xmm6,xmm6,XMMWORD[((-96))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm11,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,7
	vpandn	xmm0,xmm11,xmm13
	vpand	xmm4,xmm11,xmm12

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm14,xmm15,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm15,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm8,xmm15

	vpxor	xmm14,xmm14,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm15,13

	vpslld	xmm2,xmm15,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm14,xmm1

	vpsrld	xmm1,xmm15,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,10
	vpxor	xmm14,xmm8,xmm3
	vpaddd	xmm10,xmm10,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm14,xmm14,xmm6
	vpaddd	xmm14,xmm14,xmm7
	vmovdqu	xmm6,XMMWORD[((176-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((48-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((128-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm10,6
	vpslld	xmm2,xmm10,26
	vmovdqu	XMMWORD[(160-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm13

	vpsrld	xmm1,xmm10,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm10,21
	vpaddd	xmm5,xmm5,XMMWORD[((-64))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm10,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,7
	vpandn	xmm0,xmm10,xmm12
	vpand	xmm3,xmm10,xmm11

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm13,xmm14,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm14,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm15,xmm14

	vpxor	xmm13,xmm13,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm14,13

	vpslld	xmm2,xmm14,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm13,xmm1

	vpsrld	xmm1,xmm14,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,10
	vpxor	xmm13,xmm15,xmm4
	vpaddd	xmm9,xmm9,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm13,xmm13,xmm5
	vpaddd	xmm13,xmm13,xmm7
	vmovdqu	xmm5,XMMWORD[((192-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((64-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((144-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm9,6
	vpslld	xmm2,xmm9,26
	vmovdqu	XMMWORD[(176-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm12

	vpsrld	xmm1,xmm9,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm9,21
	vpaddd	xmm6,xmm6,XMMWORD[((-32))+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm9,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,7
	vpandn	xmm0,xmm9,xmm11
	vpand	xmm4,xmm9,xmm10

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm12,xmm13,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm13,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm14,xmm13

	vpxor	xmm12,xmm12,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm13,13

	vpslld	xmm2,xmm13,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm12,xmm1

	vpsrld	xmm1,xmm13,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,10
	vpxor	xmm12,xmm14,xmm3
	vpaddd	xmm8,xmm8,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm12,xmm12,xmm6
	vpaddd	xmm12,xmm12,xmm7
	vmovdqu	xmm6,XMMWORD[((208-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((80-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((160-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm8,6
	vpslld	xmm2,xmm8,26
	vmovdqu	XMMWORD[(192-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm11

	vpsrld	xmm1,xmm8,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm8,21
	vpaddd	xmm5,xmm5,XMMWORD[rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm8,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm8,7
	vpandn	xmm0,xmm8,xmm10
	vpand	xmm3,xmm8,xmm9

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm11,xmm12,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm12,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm13,xmm12

	vpxor	xmm11,xmm11,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm12,13

	vpslld	xmm2,xmm12,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm11,xmm1

	vpsrld	xmm1,xmm12,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm12,10
	vpxor	xmm11,xmm13,xmm4
	vpaddd	xmm15,xmm15,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm11,xmm11,xmm5
	vpaddd	xmm11,xmm11,xmm7
	vmovdqu	xmm5,XMMWORD[((224-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((96-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((176-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm15,6
	vpslld	xmm2,xmm15,26
	vmovdqu	XMMWORD[(208-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm10

	vpsrld	xmm1,xmm15,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm15,21
	vpaddd	xmm6,xmm6,XMMWORD[32+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm15,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm15,7
	vpandn	xmm0,xmm15,xmm9
	vpand	xmm4,xmm15,xmm8

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm10,xmm11,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm11,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm12,xmm11

	vpxor	xmm10,xmm10,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm11,13

	vpslld	xmm2,xmm11,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm10,xmm1

	vpsrld	xmm1,xmm11,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm11,10
	vpxor	xmm10,xmm12,xmm3
	vpaddd	xmm14,xmm14,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm10,xmm10,xmm6
	vpaddd	xmm10,xmm10,xmm7
	vmovdqu	xmm6,XMMWORD[((240-128))+rax]
	vpaddd	xmm5,xmm5,XMMWORD[((112-128))+rax]

	vpsrld	xmm7,xmm6,3
	vpsrld	xmm1,xmm6,7
	vpslld	xmm2,xmm6,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm6,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm6,14
	vmovdqu	xmm0,XMMWORD[((192-128))+rax]
	vpsrld	xmm3,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm5,xmm5,xmm7
	vpxor	xmm7,xmm3,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm5,xmm5,xmm7
	vpsrld	xmm7,xmm14,6
	vpslld	xmm2,xmm14,26
	vmovdqu	XMMWORD[(224-128)+rax],xmm5
	vpaddd	xmm5,xmm5,xmm9

	vpsrld	xmm1,xmm14,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm14,21
	vpaddd	xmm5,xmm5,XMMWORD[64+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm14,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm14,7
	vpandn	xmm0,xmm14,xmm8
	vpand	xmm3,xmm14,xmm15

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm9,xmm10,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm10,30
	vpxor	xmm0,xmm0,xmm3
	vpxor	xmm3,xmm11,xmm10

	vpxor	xmm9,xmm9,xmm1
	vpaddd	xmm5,xmm5,xmm7

	vpsrld	xmm1,xmm10,13

	vpslld	xmm2,xmm10,19
	vpaddd	xmm5,xmm5,xmm0
	vpand	xmm4,xmm4,xmm3

	vpxor	xmm7,xmm9,xmm1

	vpsrld	xmm1,xmm10,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm10,10
	vpxor	xmm9,xmm11,xmm4
	vpaddd	xmm13,xmm13,xmm5

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm9,xmm9,xmm5
	vpaddd	xmm9,xmm9,xmm7
	vmovdqu	xmm5,XMMWORD[((0-128))+rax]
	vpaddd	xmm6,xmm6,XMMWORD[((128-128))+rax]

	vpsrld	xmm7,xmm5,3
	vpsrld	xmm1,xmm5,7
	vpslld	xmm2,xmm5,25
	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm5,18
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm5,14
	vmovdqu	xmm0,XMMWORD[((208-128))+rax]
	vpsrld	xmm4,xmm0,10

	vpxor	xmm7,xmm7,xmm1
	vpsrld	xmm1,xmm0,17
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,15
	vpaddd	xmm6,xmm6,xmm7
	vpxor	xmm7,xmm4,xmm1
	vpsrld	xmm1,xmm0,19
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm0,13
	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2
	vpaddd	xmm6,xmm6,xmm7
	vpsrld	xmm7,xmm13,6
	vpslld	xmm2,xmm13,26
	vmovdqu	XMMWORD[(240-128)+rax],xmm6
	vpaddd	xmm6,xmm6,xmm8

	vpsrld	xmm1,xmm13,11
	vpxor	xmm7,xmm7,xmm2
	vpslld	xmm2,xmm13,21
	vpaddd	xmm6,xmm6,XMMWORD[96+rbp]
	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm1,xmm13,25
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm13,7
	vpandn	xmm0,xmm13,xmm15
	vpand	xmm4,xmm13,xmm14

	vpxor	xmm7,xmm7,xmm1

	vpsrld	xmm8,xmm9,2
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm1,xmm9,30
	vpxor	xmm0,xmm0,xmm4
	vpxor	xmm4,xmm10,xmm9

	vpxor	xmm8,xmm8,xmm1
	vpaddd	xmm6,xmm6,xmm7

	vpsrld	xmm1,xmm9,13

	vpslld	xmm2,xmm9,19
	vpaddd	xmm6,xmm6,xmm0
	vpand	xmm3,xmm3,xmm4

	vpxor	xmm7,xmm8,xmm1

	vpsrld	xmm1,xmm9,22
	vpxor	xmm7,xmm7,xmm2

	vpslld	xmm2,xmm9,10
	vpxor	xmm8,xmm10,xmm3
	vpaddd	xmm12,xmm12,xmm6

	vpxor	xmm7,xmm7,xmm1
	vpxor	xmm7,xmm7,xmm2

	vpaddd	xmm8,xmm8,xmm6
	vpaddd	xmm8,xmm8,xmm7
	add	rbp,256
	dec	ecx
	jnz	NEAR $L$oop_16_xx_avx

	mov	ecx,1
	lea	rbp,[((K256+128))]
	cmp	ecx,DWORD[rbx]
	cmovge	r8,rbp
	cmp	ecx,DWORD[4+rbx]
	cmovge	r9,rbp
	cmp	ecx,DWORD[8+rbx]
	cmovge	r10,rbp
	cmp	ecx,DWORD[12+rbx]
	cmovge	r11,rbp
	vmovdqa	xmm7,XMMWORD[rbx]
	vpxor	xmm0,xmm0,xmm0
	vmovdqa	xmm6,xmm7
	vpcmpgtd	xmm6,xmm6,xmm0
	vpaddd	xmm7,xmm7,xmm6

	vmovdqu	xmm0,XMMWORD[((0-128))+rdi]
	vpand	xmm8,xmm8,xmm6
	vmovdqu	xmm1,XMMWORD[((32-128))+rdi]
	vpand	xmm9,xmm9,xmm6
	vmovdqu	xmm2,XMMWORD[((64-128))+rdi]
	vpand	xmm10,xmm10,xmm6
	vmovdqu	xmm5,XMMWORD[((96-128))+rdi]
	vpand	xmm11,xmm11,xmm6
	vpaddd	xmm8,xmm8,xmm0
	vmovdqu	xmm0,XMMWORD[((128-128))+rdi]
	vpand	xmm12,xmm12,xmm6
	vpaddd	xmm9,xmm9,xmm1
	vmovdqu	xmm1,XMMWORD[((160-128))+rdi]
	vpand	xmm13,xmm13,xmm6
	vpaddd	xmm10,xmm10,xmm2
	vmovdqu	xmm2,XMMWORD[((192-128))+rdi]
	vpand	xmm14,xmm14,xmm6
	vpaddd	xmm11,xmm11,xmm5
	vmovdqu	xmm5,XMMWORD[((224-128))+rdi]
	vpand	xmm15,xmm15,xmm6
	vpaddd	xmm12,xmm12,xmm0
	vpaddd	xmm13,xmm13,xmm1
	vmovdqu	XMMWORD[(0-128)+rdi],xmm8
	vpaddd	xmm14,xmm14,xmm2
	vmovdqu	XMMWORD[(32-128)+rdi],xmm9
	vpaddd	xmm15,xmm15,xmm5
	vmovdqu	XMMWORD[(64-128)+rdi],xmm10
	vmovdqu	XMMWORD[(96-128)+rdi],xmm11
	vmovdqu	XMMWORD[(128-128)+rdi],xmm12
	vmovdqu	XMMWORD[(160-128)+rdi],xmm13
	vmovdqu	XMMWORD[(192-128)+rdi],xmm14
	vmovdqu	XMMWORD[(224-128)+rdi],xmm15

	vmovdqu	XMMWORD[rbx],xmm7
	vmovdqu	xmm6,XMMWORD[$L$pbswap]
	dec	edx
	jnz	NEAR $L$oop_avx

	mov	edx,DWORD[280+rsp]
	lea	rdi,[16+rdi]
	lea	rsi,[64+rsi]
	dec	edx
	jnz	NEAR $L$oop_grande_avx

$L$done_avx:
	mov	rax,QWORD[272+rsp]

	vzeroupper
	movaps	xmm6,XMMWORD[((-184))+rax]
	movaps	xmm7,XMMWORD[((-168))+rax]
	movaps	xmm8,XMMWORD[((-152))+rax]
	movaps	xmm9,XMMWORD[((-136))+rax]
	movaps	xmm10,XMMWORD[((-120))+rax]
	movaps	xmm11,XMMWORD[((-104))+rax]
	movaps	xmm12,XMMWORD[((-88))+rax]
	movaps	xmm13,XMMWORD[((-72))+rax]
	movaps	xmm14,XMMWORD[((-56))+rax]
	movaps	xmm15,XMMWORD[((-40))+rax]
	mov	rbp,QWORD[((-16))+rax]

	mov	rbx,QWORD[((-8))+rax]

	lea	rsp,[rax]

$L$epilogue_avx:
	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
	mov	rsi,QWORD[16+rsp]
	DB	0F3h,0C3h		;repret

$L$SEH_end_sha256_multi_block_avx:

ALIGN	32
sha256_multi_block_avx2:
	mov	QWORD[8+rsp],rdi	;WIN64 prologue
	mov	QWORD[16+rsp],rsi
	mov	rax,rsp
$L$SEH_begin_sha256_multi_block_avx2:
	mov	rdi,rcx
	mov	rsi,rdx
	mov	rdx,r8



_avx2_shortcut:
	mov	rax,rsp

	push	rbx

	push	rbp

	push	r12

	push	r13

	push	r14

	push	r15

	lea	rsp,[((-168))+rsp]
	movaps	XMMWORD[rsp],xmm6
	movaps	XMMWORD[16+rsp],xmm7
	movaps	XMMWORD[32+rsp],xmm8
	movaps	XMMWORD[48+rsp],xmm9
	movaps	XMMWORD[64+rsp],xmm10
	movaps	XMMWORD[80+rsp],xmm11
	movaps	XMMWORD[(-120)+rax],xmm12
	movaps	XMMWORD[(-104)+rax],xmm13
	movaps	XMMWORD[(-88)+rax],xmm14
	movaps	XMMWORD[(-72)+rax],xmm15
	sub	rsp,576
	and	rsp,-256
	mov	QWORD[544+rsp],rax

$L$body_avx2:
	lea	rbp,[((K256+128))]
	lea	rdi,[128+rdi]

$L$oop_grande_avx2:
	mov	DWORD[552+rsp],edx
	xor	edx,edx
	lea	rbx,[512+rsp]
	mov	r12,QWORD[rsi]
	mov	ecx,DWORD[8+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[rbx],ecx
	cmovle	r12,rbp
	mov	r13,QWORD[16+rsi]
	mov	ecx,DWORD[24+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[4+rbx],ecx
	cmovle	r13,rbp
	mov	r14,QWORD[32+rsi]
	mov	ecx,DWORD[40+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[8+rbx],ecx
	cmovle	r14,rbp
	mov	r15,QWORD[48+rsi]
	mov	ecx,DWORD[56+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[12+rbx],ecx
	cmovle	r15,rbp
	mov	r8,QWORD[64+rsi]
	mov	ecx,DWORD[72+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[16+rbx],ecx
	cmovle	r8,rbp
	mov	r9,QWORD[80+rsi]
	mov	ecx,DWORD[88+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[20+rbx],ecx
	cmovle	r9,rbp
	mov	r10,QWORD[96+rsi]
	mov	ecx,DWORD[104+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[24+rbx],ecx
	cmovle	r10,rbp
	mov	r11,QWORD[112+rsi]
	mov	ecx,DWORD[120+rsi]
	cmp	ecx,edx
	cmovg	edx,ecx
	test	ecx,ecx
	mov	DWORD[28+rbx],ecx
	cmovle	r11,rbp
	vmovdqu	ymm8,YMMWORD[((0-128))+rdi]
	lea	rax,[128+rsp]
	vmovdqu	ymm9,YMMWORD[((32-128))+rdi]
	lea	rbx,[((256+128))+rsp]
	vmovdqu	ymm10,YMMWORD[((64-128))+rdi]
	vmovdqu	ymm11,YMMWORD[((96-128))+rdi]
	vmovdqu	ymm12,YMMWORD[((128-128))+rdi]
	vmovdqu	ymm13,YMMWORD[((160-128))+rdi]
	vmovdqu	ymm14,YMMWORD[((192-128))+rdi]
	vmovdqu	ymm15,YMMWORD[((224-128))+rdi]
	vmovdqu	ymm6,YMMWORD[$L$pbswap]
	jmp	NEAR $L$oop_avx2

ALIGN	32
$L$oop_avx2:
	vpxor	ymm4,ymm10,ymm9
	vmovd	xmm5,DWORD[r12]
	vmovd	xmm0,DWORD[r8]
	vmovd	xmm1,DWORD[r13]
	vmovd	xmm2,DWORD[r9]
	vpinsrd	xmm5,xmm5,DWORD[r14],1
	vpinsrd	xmm0,xmm0,DWORD[r10],1
	vpinsrd	xmm1,xmm1,DWORD[r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm12,6
	vpslld	ymm2,ymm12,26
	vmovdqu	YMMWORD[(0-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm15

	vpsrld	ymm1,ymm12,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm12,21
	vpaddd	ymm5,ymm5,YMMWORD[((-128))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm12,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,7
	vpandn	ymm0,ymm12,ymm14
	vpand	ymm3,ymm12,ymm13

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm15,ymm8,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm8,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm9,ymm8

	vpxor	ymm15,ymm15,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm8,13

	vpslld	ymm2,ymm8,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm15,ymm1

	vpsrld	ymm1,ymm8,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,10
	vpxor	ymm15,ymm9,ymm4
	vpaddd	ymm11,ymm11,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm15,ymm15,ymm5
	vpaddd	ymm15,ymm15,ymm7
	vmovd	xmm5,DWORD[4+r12]
	vmovd	xmm0,DWORD[4+r8]
	vmovd	xmm1,DWORD[4+r13]
	vmovd	xmm2,DWORD[4+r9]
	vpinsrd	xmm5,xmm5,DWORD[4+r14],1
	vpinsrd	xmm0,xmm0,DWORD[4+r10],1
	vpinsrd	xmm1,xmm1,DWORD[4+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[4+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm11,6
	vpslld	ymm2,ymm11,26
	vmovdqu	YMMWORD[(32-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm14

	vpsrld	ymm1,ymm11,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm11,21
	vpaddd	ymm5,ymm5,YMMWORD[((-96))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm11,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,7
	vpandn	ymm0,ymm11,ymm13
	vpand	ymm4,ymm11,ymm12

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm14,ymm15,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm15,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm8,ymm15

	vpxor	ymm14,ymm14,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm15,13

	vpslld	ymm2,ymm15,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm14,ymm1

	vpsrld	ymm1,ymm15,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,10
	vpxor	ymm14,ymm8,ymm3
	vpaddd	ymm10,ymm10,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm14,ymm14,ymm5
	vpaddd	ymm14,ymm14,ymm7
	vmovd	xmm5,DWORD[8+r12]
	vmovd	xmm0,DWORD[8+r8]
	vmovd	xmm1,DWORD[8+r13]
	vmovd	xmm2,DWORD[8+r9]
	vpinsrd	xmm5,xmm5,DWORD[8+r14],1
	vpinsrd	xmm0,xmm0,DWORD[8+r10],1
	vpinsrd	xmm1,xmm1,DWORD[8+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[8+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm10,6
	vpslld	ymm2,ymm10,26
	vmovdqu	YMMWORD[(64-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm13

	vpsrld	ymm1,ymm10,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm10,21
	vpaddd	ymm5,ymm5,YMMWORD[((-64))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm10,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,7
	vpandn	ymm0,ymm10,ymm12
	vpand	ymm3,ymm10,ymm11

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm13,ymm14,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm14,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm15,ymm14

	vpxor	ymm13,ymm13,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm14,13

	vpslld	ymm2,ymm14,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm13,ymm1

	vpsrld	ymm1,ymm14,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,10
	vpxor	ymm13,ymm15,ymm4
	vpaddd	ymm9,ymm9,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm13,ymm13,ymm5
	vpaddd	ymm13,ymm13,ymm7
	vmovd	xmm5,DWORD[12+r12]
	vmovd	xmm0,DWORD[12+r8]
	vmovd	xmm1,DWORD[12+r13]
	vmovd	xmm2,DWORD[12+r9]
	vpinsrd	xmm5,xmm5,DWORD[12+r14],1
	vpinsrd	xmm0,xmm0,DWORD[12+r10],1
	vpinsrd	xmm1,xmm1,DWORD[12+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[12+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm9,6
	vpslld	ymm2,ymm9,26
	vmovdqu	YMMWORD[(96-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm12

	vpsrld	ymm1,ymm9,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm9,21
	vpaddd	ymm5,ymm5,YMMWORD[((-32))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm9,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,7
	vpandn	ymm0,ymm9,ymm11
	vpand	ymm4,ymm9,ymm10

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm12,ymm13,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm13,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm14,ymm13

	vpxor	ymm12,ymm12,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm13,13

	vpslld	ymm2,ymm13,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm12,ymm1

	vpsrld	ymm1,ymm13,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,10
	vpxor	ymm12,ymm14,ymm3
	vpaddd	ymm8,ymm8,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm12,ymm12,ymm5
	vpaddd	ymm12,ymm12,ymm7
	vmovd	xmm5,DWORD[16+r12]
	vmovd	xmm0,DWORD[16+r8]
	vmovd	xmm1,DWORD[16+r13]
	vmovd	xmm2,DWORD[16+r9]
	vpinsrd	xmm5,xmm5,DWORD[16+r14],1
	vpinsrd	xmm0,xmm0,DWORD[16+r10],1
	vpinsrd	xmm1,xmm1,DWORD[16+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[16+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm8,6
	vpslld	ymm2,ymm8,26
	vmovdqu	YMMWORD[(128-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm11

	vpsrld	ymm1,ymm8,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm8,21
	vpaddd	ymm5,ymm5,YMMWORD[rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm8,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,7
	vpandn	ymm0,ymm8,ymm10
	vpand	ymm3,ymm8,ymm9

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm11,ymm12,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm12,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm13,ymm12

	vpxor	ymm11,ymm11,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm12,13

	vpslld	ymm2,ymm12,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm11,ymm1

	vpsrld	ymm1,ymm12,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,10
	vpxor	ymm11,ymm13,ymm4
	vpaddd	ymm15,ymm15,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm11,ymm11,ymm5
	vpaddd	ymm11,ymm11,ymm7
	vmovd	xmm5,DWORD[20+r12]
	vmovd	xmm0,DWORD[20+r8]
	vmovd	xmm1,DWORD[20+r13]
	vmovd	xmm2,DWORD[20+r9]
	vpinsrd	xmm5,xmm5,DWORD[20+r14],1
	vpinsrd	xmm0,xmm0,DWORD[20+r10],1
	vpinsrd	xmm1,xmm1,DWORD[20+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[20+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm15,6
	vpslld	ymm2,ymm15,26
	vmovdqu	YMMWORD[(160-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm10

	vpsrld	ymm1,ymm15,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm15,21
	vpaddd	ymm5,ymm5,YMMWORD[32+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm15,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,7
	vpandn	ymm0,ymm15,ymm9
	vpand	ymm4,ymm15,ymm8

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm10,ymm11,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm11,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm12,ymm11

	vpxor	ymm10,ymm10,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm11,13

	vpslld	ymm2,ymm11,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm10,ymm1

	vpsrld	ymm1,ymm11,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,10
	vpxor	ymm10,ymm12,ymm3
	vpaddd	ymm14,ymm14,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm10,ymm10,ymm5
	vpaddd	ymm10,ymm10,ymm7
	vmovd	xmm5,DWORD[24+r12]
	vmovd	xmm0,DWORD[24+r8]
	vmovd	xmm1,DWORD[24+r13]
	vmovd	xmm2,DWORD[24+r9]
	vpinsrd	xmm5,xmm5,DWORD[24+r14],1
	vpinsrd	xmm0,xmm0,DWORD[24+r10],1
	vpinsrd	xmm1,xmm1,DWORD[24+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[24+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm14,6
	vpslld	ymm2,ymm14,26
	vmovdqu	YMMWORD[(192-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm9

	vpsrld	ymm1,ymm14,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm14,21
	vpaddd	ymm5,ymm5,YMMWORD[64+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm14,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,7
	vpandn	ymm0,ymm14,ymm8
	vpand	ymm3,ymm14,ymm15

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm9,ymm10,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm10,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm11,ymm10

	vpxor	ymm9,ymm9,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm10,13

	vpslld	ymm2,ymm10,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm9,ymm1

	vpsrld	ymm1,ymm10,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,10
	vpxor	ymm9,ymm11,ymm4
	vpaddd	ymm13,ymm13,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm9,ymm9,ymm5
	vpaddd	ymm9,ymm9,ymm7
	vmovd	xmm5,DWORD[28+r12]
	vmovd	xmm0,DWORD[28+r8]
	vmovd	xmm1,DWORD[28+r13]
	vmovd	xmm2,DWORD[28+r9]
	vpinsrd	xmm5,xmm5,DWORD[28+r14],1
	vpinsrd	xmm0,xmm0,DWORD[28+r10],1
	vpinsrd	xmm1,xmm1,DWORD[28+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[28+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm13,6
	vpslld	ymm2,ymm13,26
	vmovdqu	YMMWORD[(224-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm8

	vpsrld	ymm1,ymm13,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm13,21
	vpaddd	ymm5,ymm5,YMMWORD[96+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm13,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,7
	vpandn	ymm0,ymm13,ymm15
	vpand	ymm4,ymm13,ymm14

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm8,ymm9,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm9,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm10,ymm9

	vpxor	ymm8,ymm8,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm9,13

	vpslld	ymm2,ymm9,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm8,ymm1

	vpsrld	ymm1,ymm9,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,10
	vpxor	ymm8,ymm10,ymm3
	vpaddd	ymm12,ymm12,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm8,ymm8,ymm5
	vpaddd	ymm8,ymm8,ymm7
	add	rbp,256
	vmovd	xmm5,DWORD[32+r12]
	vmovd	xmm0,DWORD[32+r8]
	vmovd	xmm1,DWORD[32+r13]
	vmovd	xmm2,DWORD[32+r9]
	vpinsrd	xmm5,xmm5,DWORD[32+r14],1
	vpinsrd	xmm0,xmm0,DWORD[32+r10],1
	vpinsrd	xmm1,xmm1,DWORD[32+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[32+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm12,6
	vpslld	ymm2,ymm12,26
	vmovdqu	YMMWORD[(256-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm15

	vpsrld	ymm1,ymm12,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm12,21
	vpaddd	ymm5,ymm5,YMMWORD[((-128))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm12,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,7
	vpandn	ymm0,ymm12,ymm14
	vpand	ymm3,ymm12,ymm13

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm15,ymm8,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm8,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm9,ymm8

	vpxor	ymm15,ymm15,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm8,13

	vpslld	ymm2,ymm8,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm15,ymm1

	vpsrld	ymm1,ymm8,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,10
	vpxor	ymm15,ymm9,ymm4
	vpaddd	ymm11,ymm11,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm15,ymm15,ymm5
	vpaddd	ymm15,ymm15,ymm7
	vmovd	xmm5,DWORD[36+r12]
	vmovd	xmm0,DWORD[36+r8]
	vmovd	xmm1,DWORD[36+r13]
	vmovd	xmm2,DWORD[36+r9]
	vpinsrd	xmm5,xmm5,DWORD[36+r14],1
	vpinsrd	xmm0,xmm0,DWORD[36+r10],1
	vpinsrd	xmm1,xmm1,DWORD[36+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[36+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm11,6
	vpslld	ymm2,ymm11,26
	vmovdqu	YMMWORD[(288-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm14

	vpsrld	ymm1,ymm11,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm11,21
	vpaddd	ymm5,ymm5,YMMWORD[((-96))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm11,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,7
	vpandn	ymm0,ymm11,ymm13
	vpand	ymm4,ymm11,ymm12

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm14,ymm15,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm15,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm8,ymm15

	vpxor	ymm14,ymm14,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm15,13

	vpslld	ymm2,ymm15,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm14,ymm1

	vpsrld	ymm1,ymm15,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,10
	vpxor	ymm14,ymm8,ymm3
	vpaddd	ymm10,ymm10,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm14,ymm14,ymm5
	vpaddd	ymm14,ymm14,ymm7
	vmovd	xmm5,DWORD[40+r12]
	vmovd	xmm0,DWORD[40+r8]
	vmovd	xmm1,DWORD[40+r13]
	vmovd	xmm2,DWORD[40+r9]
	vpinsrd	xmm5,xmm5,DWORD[40+r14],1
	vpinsrd	xmm0,xmm0,DWORD[40+r10],1
	vpinsrd	xmm1,xmm1,DWORD[40+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[40+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm10,6
	vpslld	ymm2,ymm10,26
	vmovdqu	YMMWORD[(320-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm13

	vpsrld	ymm1,ymm10,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm10,21
	vpaddd	ymm5,ymm5,YMMWORD[((-64))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm10,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,7
	vpandn	ymm0,ymm10,ymm12
	vpand	ymm3,ymm10,ymm11

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm13,ymm14,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm14,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm15,ymm14

	vpxor	ymm13,ymm13,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm14,13

	vpslld	ymm2,ymm14,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm13,ymm1

	vpsrld	ymm1,ymm14,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,10
	vpxor	ymm13,ymm15,ymm4
	vpaddd	ymm9,ymm9,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm13,ymm13,ymm5
	vpaddd	ymm13,ymm13,ymm7
	vmovd	xmm5,DWORD[44+r12]
	vmovd	xmm0,DWORD[44+r8]
	vmovd	xmm1,DWORD[44+r13]
	vmovd	xmm2,DWORD[44+r9]
	vpinsrd	xmm5,xmm5,DWORD[44+r14],1
	vpinsrd	xmm0,xmm0,DWORD[44+r10],1
	vpinsrd	xmm1,xmm1,DWORD[44+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[44+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm9,6
	vpslld	ymm2,ymm9,26
	vmovdqu	YMMWORD[(352-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm12

	vpsrld	ymm1,ymm9,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm9,21
	vpaddd	ymm5,ymm5,YMMWORD[((-32))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm9,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,7
	vpandn	ymm0,ymm9,ymm11
	vpand	ymm4,ymm9,ymm10

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm12,ymm13,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm13,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm14,ymm13

	vpxor	ymm12,ymm12,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm13,13

	vpslld	ymm2,ymm13,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm12,ymm1

	vpsrld	ymm1,ymm13,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,10
	vpxor	ymm12,ymm14,ymm3
	vpaddd	ymm8,ymm8,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm12,ymm12,ymm5
	vpaddd	ymm12,ymm12,ymm7
	vmovd	xmm5,DWORD[48+r12]
	vmovd	xmm0,DWORD[48+r8]
	vmovd	xmm1,DWORD[48+r13]
	vmovd	xmm2,DWORD[48+r9]
	vpinsrd	xmm5,xmm5,DWORD[48+r14],1
	vpinsrd	xmm0,xmm0,DWORD[48+r10],1
	vpinsrd	xmm1,xmm1,DWORD[48+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[48+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm8,6
	vpslld	ymm2,ymm8,26
	vmovdqu	YMMWORD[(384-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm11

	vpsrld	ymm1,ymm8,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm8,21
	vpaddd	ymm5,ymm5,YMMWORD[rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm8,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,7
	vpandn	ymm0,ymm8,ymm10
	vpand	ymm3,ymm8,ymm9

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm11,ymm12,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm12,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm13,ymm12

	vpxor	ymm11,ymm11,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm12,13

	vpslld	ymm2,ymm12,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm11,ymm1

	vpsrld	ymm1,ymm12,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,10
	vpxor	ymm11,ymm13,ymm4
	vpaddd	ymm15,ymm15,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm11,ymm11,ymm5
	vpaddd	ymm11,ymm11,ymm7
	vmovd	xmm5,DWORD[52+r12]
	vmovd	xmm0,DWORD[52+r8]
	vmovd	xmm1,DWORD[52+r13]
	vmovd	xmm2,DWORD[52+r9]
	vpinsrd	xmm5,xmm5,DWORD[52+r14],1
	vpinsrd	xmm0,xmm0,DWORD[52+r10],1
	vpinsrd	xmm1,xmm1,DWORD[52+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[52+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm15,6
	vpslld	ymm2,ymm15,26
	vmovdqu	YMMWORD[(416-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm10

	vpsrld	ymm1,ymm15,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm15,21
	vpaddd	ymm5,ymm5,YMMWORD[32+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm15,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,7
	vpandn	ymm0,ymm15,ymm9
	vpand	ymm4,ymm15,ymm8

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm10,ymm11,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm11,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm12,ymm11

	vpxor	ymm10,ymm10,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm11,13

	vpslld	ymm2,ymm11,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm10,ymm1

	vpsrld	ymm1,ymm11,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,10
	vpxor	ymm10,ymm12,ymm3
	vpaddd	ymm14,ymm14,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm10,ymm10,ymm5
	vpaddd	ymm10,ymm10,ymm7
	vmovd	xmm5,DWORD[56+r12]
	vmovd	xmm0,DWORD[56+r8]
	vmovd	xmm1,DWORD[56+r13]
	vmovd	xmm2,DWORD[56+r9]
	vpinsrd	xmm5,xmm5,DWORD[56+r14],1
	vpinsrd	xmm0,xmm0,DWORD[56+r10],1
	vpinsrd	xmm1,xmm1,DWORD[56+r15],1
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[56+r11],1
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm14,6
	vpslld	ymm2,ymm14,26
	vmovdqu	YMMWORD[(448-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm9

	vpsrld	ymm1,ymm14,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm14,21
	vpaddd	ymm5,ymm5,YMMWORD[64+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm14,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,7
	vpandn	ymm0,ymm14,ymm8
	vpand	ymm3,ymm14,ymm15

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm9,ymm10,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm10,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm11,ymm10

	vpxor	ymm9,ymm9,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm10,13

	vpslld	ymm2,ymm10,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm9,ymm1

	vpsrld	ymm1,ymm10,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,10
	vpxor	ymm9,ymm11,ymm4
	vpaddd	ymm13,ymm13,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm9,ymm9,ymm5
	vpaddd	ymm9,ymm9,ymm7
	vmovd	xmm5,DWORD[60+r12]
	lea	r12,[64+r12]
	vmovd	xmm0,DWORD[60+r8]
	lea	r8,[64+r8]
	vmovd	xmm1,DWORD[60+r13]
	lea	r13,[64+r13]
	vmovd	xmm2,DWORD[60+r9]
	lea	r9,[64+r9]
	vpinsrd	xmm5,xmm5,DWORD[60+r14],1
	lea	r14,[64+r14]
	vpinsrd	xmm0,xmm0,DWORD[60+r10],1
	lea	r10,[64+r10]
	vpinsrd	xmm1,xmm1,DWORD[60+r15],1
	lea	r15,[64+r15]
	vpunpckldq	ymm5,ymm5,ymm1
	vpinsrd	xmm2,xmm2,DWORD[60+r11],1
	lea	r11,[64+r11]
	vpunpckldq	ymm0,ymm0,ymm2
	vinserti128	ymm5,ymm5,xmm0,1
	vpshufb	ymm5,ymm5,ymm6
	vpsrld	ymm7,ymm13,6
	vpslld	ymm2,ymm13,26
	vmovdqu	YMMWORD[(480-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm8

	vpsrld	ymm1,ymm13,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm13,21
	vpaddd	ymm5,ymm5,YMMWORD[96+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm13,25
	vpxor	ymm7,ymm7,ymm2
	prefetcht0	[63+r12]
	vpslld	ymm2,ymm13,7
	vpandn	ymm0,ymm13,ymm15
	vpand	ymm4,ymm13,ymm14
	prefetcht0	[63+r13]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm8,ymm9,2
	vpxor	ymm7,ymm7,ymm2
	prefetcht0	[63+r14]
	vpslld	ymm1,ymm9,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm10,ymm9
	prefetcht0	[63+r15]
	vpxor	ymm8,ymm8,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm9,13
	prefetcht0	[63+r8]
	vpslld	ymm2,ymm9,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm3,ymm3,ymm4
	prefetcht0	[63+r9]
	vpxor	ymm7,ymm8,ymm1

	vpsrld	ymm1,ymm9,22
	vpxor	ymm7,ymm7,ymm2
	prefetcht0	[63+r10]
	vpslld	ymm2,ymm9,10
	vpxor	ymm8,ymm10,ymm3
	vpaddd	ymm12,ymm12,ymm5
	prefetcht0	[63+r11]
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm8,ymm8,ymm5
	vpaddd	ymm8,ymm8,ymm7
	add	rbp,256
	vmovdqu	ymm5,YMMWORD[((0-128))+rax]
	mov	ecx,3
	jmp	NEAR $L$oop_16_xx_avx2
ALIGN	32
$L$oop_16_xx_avx2:
	vmovdqu	ymm6,YMMWORD[((32-128))+rax]
	vpaddd	ymm5,ymm5,YMMWORD[((288-256-128))+rbx]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((448-256-128))+rbx]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm12,6
	vpslld	ymm2,ymm12,26
	vmovdqu	YMMWORD[(0-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm15

	vpsrld	ymm1,ymm12,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm12,21
	vpaddd	ymm5,ymm5,YMMWORD[((-128))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm12,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,7
	vpandn	ymm0,ymm12,ymm14
	vpand	ymm3,ymm12,ymm13

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm15,ymm8,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm8,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm9,ymm8

	vpxor	ymm15,ymm15,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm8,13

	vpslld	ymm2,ymm8,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm15,ymm1

	vpsrld	ymm1,ymm8,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,10
	vpxor	ymm15,ymm9,ymm4
	vpaddd	ymm11,ymm11,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm15,ymm15,ymm5
	vpaddd	ymm15,ymm15,ymm7
	vmovdqu	ymm5,YMMWORD[((64-128))+rax]
	vpaddd	ymm6,ymm6,YMMWORD[((320-256-128))+rbx]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((480-256-128))+rbx]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm11,6
	vpslld	ymm2,ymm11,26
	vmovdqu	YMMWORD[(32-128)+rax],ymm6
	vpaddd	ymm6,ymm6,ymm14

	vpsrld	ymm1,ymm11,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm11,21
	vpaddd	ymm6,ymm6,YMMWORD[((-96))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm11,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,7
	vpandn	ymm0,ymm11,ymm13
	vpand	ymm4,ymm11,ymm12

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm14,ymm15,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm15,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm8,ymm15

	vpxor	ymm14,ymm14,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm15,13

	vpslld	ymm2,ymm15,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm14,ymm1

	vpsrld	ymm1,ymm15,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,10
	vpxor	ymm14,ymm8,ymm3
	vpaddd	ymm10,ymm10,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm14,ymm14,ymm6
	vpaddd	ymm14,ymm14,ymm7
	vmovdqu	ymm6,YMMWORD[((96-128))+rax]
	vpaddd	ymm5,ymm5,YMMWORD[((352-256-128))+rbx]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((0-128))+rax]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm10,6
	vpslld	ymm2,ymm10,26
	vmovdqu	YMMWORD[(64-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm13

	vpsrld	ymm1,ymm10,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm10,21
	vpaddd	ymm5,ymm5,YMMWORD[((-64))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm10,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,7
	vpandn	ymm0,ymm10,ymm12
	vpand	ymm3,ymm10,ymm11

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm13,ymm14,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm14,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm15,ymm14

	vpxor	ymm13,ymm13,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm14,13

	vpslld	ymm2,ymm14,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm13,ymm1

	vpsrld	ymm1,ymm14,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,10
	vpxor	ymm13,ymm15,ymm4
	vpaddd	ymm9,ymm9,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm13,ymm13,ymm5
	vpaddd	ymm13,ymm13,ymm7
	vmovdqu	ymm5,YMMWORD[((128-128))+rax]
	vpaddd	ymm6,ymm6,YMMWORD[((384-256-128))+rbx]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((32-128))+rax]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm9,6
	vpslld	ymm2,ymm9,26
	vmovdqu	YMMWORD[(96-128)+rax],ymm6
	vpaddd	ymm6,ymm6,ymm12

	vpsrld	ymm1,ymm9,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm9,21
	vpaddd	ymm6,ymm6,YMMWORD[((-32))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm9,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,7
	vpandn	ymm0,ymm9,ymm11
	vpand	ymm4,ymm9,ymm10

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm12,ymm13,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm13,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm14,ymm13

	vpxor	ymm12,ymm12,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm13,13

	vpslld	ymm2,ymm13,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm12,ymm1

	vpsrld	ymm1,ymm13,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,10
	vpxor	ymm12,ymm14,ymm3
	vpaddd	ymm8,ymm8,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm12,ymm12,ymm6
	vpaddd	ymm12,ymm12,ymm7
	vmovdqu	ymm6,YMMWORD[((160-128))+rax]
	vpaddd	ymm5,ymm5,YMMWORD[((416-256-128))+rbx]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((64-128))+rax]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm8,6
	vpslld	ymm2,ymm8,26
	vmovdqu	YMMWORD[(128-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm11

	vpsrld	ymm1,ymm8,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm8,21
	vpaddd	ymm5,ymm5,YMMWORD[rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm8,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,7
	vpandn	ymm0,ymm8,ymm10
	vpand	ymm3,ymm8,ymm9

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm11,ymm12,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm12,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm13,ymm12

	vpxor	ymm11,ymm11,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm12,13

	vpslld	ymm2,ymm12,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm11,ymm1

	vpsrld	ymm1,ymm12,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,10
	vpxor	ymm11,ymm13,ymm4
	vpaddd	ymm15,ymm15,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm11,ymm11,ymm5
	vpaddd	ymm11,ymm11,ymm7
	vmovdqu	ymm5,YMMWORD[((192-128))+rax]
	vpaddd	ymm6,ymm6,YMMWORD[((448-256-128))+rbx]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((96-128))+rax]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm15,6
	vpslld	ymm2,ymm15,26
	vmovdqu	YMMWORD[(160-128)+rax],ymm6
	vpaddd	ymm6,ymm6,ymm10

	vpsrld	ymm1,ymm15,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm15,21
	vpaddd	ymm6,ymm6,YMMWORD[32+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm15,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,7
	vpandn	ymm0,ymm15,ymm9
	vpand	ymm4,ymm15,ymm8

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm10,ymm11,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm11,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm12,ymm11

	vpxor	ymm10,ymm10,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm11,13

	vpslld	ymm2,ymm11,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm10,ymm1

	vpsrld	ymm1,ymm11,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,10
	vpxor	ymm10,ymm12,ymm3
	vpaddd	ymm14,ymm14,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm10,ymm10,ymm6
	vpaddd	ymm10,ymm10,ymm7
	vmovdqu	ymm6,YMMWORD[((224-128))+rax]
	vpaddd	ymm5,ymm5,YMMWORD[((480-256-128))+rbx]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((128-128))+rax]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm14,6
	vpslld	ymm2,ymm14,26
	vmovdqu	YMMWORD[(192-128)+rax],ymm5
	vpaddd	ymm5,ymm5,ymm9

	vpsrld	ymm1,ymm14,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm14,21
	vpaddd	ymm5,ymm5,YMMWORD[64+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm14,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,7
	vpandn	ymm0,ymm14,ymm8
	vpand	ymm3,ymm14,ymm15

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm9,ymm10,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm10,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm11,ymm10

	vpxor	ymm9,ymm9,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm10,13

	vpslld	ymm2,ymm10,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm9,ymm1

	vpsrld	ymm1,ymm10,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,10
	vpxor	ymm9,ymm11,ymm4
	vpaddd	ymm13,ymm13,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm9,ymm9,ymm5
	vpaddd	ymm9,ymm9,ymm7
	vmovdqu	ymm5,YMMWORD[((256-256-128))+rbx]
	vpaddd	ymm6,ymm6,YMMWORD[((0-128))+rax]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((160-128))+rax]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm13,6
	vpslld	ymm2,ymm13,26
	vmovdqu	YMMWORD[(224-128)+rax],ymm6
	vpaddd	ymm6,ymm6,ymm8

	vpsrld	ymm1,ymm13,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm13,21
	vpaddd	ymm6,ymm6,YMMWORD[96+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm13,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,7
	vpandn	ymm0,ymm13,ymm15
	vpand	ymm4,ymm13,ymm14

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm8,ymm9,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm9,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm10,ymm9

	vpxor	ymm8,ymm8,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm9,13

	vpslld	ymm2,ymm9,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm8,ymm1

	vpsrld	ymm1,ymm9,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,10
	vpxor	ymm8,ymm10,ymm3
	vpaddd	ymm12,ymm12,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm8,ymm8,ymm6
	vpaddd	ymm8,ymm8,ymm7
	add	rbp,256
	vmovdqu	ymm6,YMMWORD[((288-256-128))+rbx]
	vpaddd	ymm5,ymm5,YMMWORD[((32-128))+rax]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((192-128))+rax]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm12,6
	vpslld	ymm2,ymm12,26
	vmovdqu	YMMWORD[(256-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm15

	vpsrld	ymm1,ymm12,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm12,21
	vpaddd	ymm5,ymm5,YMMWORD[((-128))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm12,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,7
	vpandn	ymm0,ymm12,ymm14
	vpand	ymm3,ymm12,ymm13

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm15,ymm8,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm8,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm9,ymm8

	vpxor	ymm15,ymm15,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm8,13

	vpslld	ymm2,ymm8,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm15,ymm1

	vpsrld	ymm1,ymm8,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,10
	vpxor	ymm15,ymm9,ymm4
	vpaddd	ymm11,ymm11,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm15,ymm15,ymm5
	vpaddd	ymm15,ymm15,ymm7
	vmovdqu	ymm5,YMMWORD[((320-256-128))+rbx]
	vpaddd	ymm6,ymm6,YMMWORD[((64-128))+rax]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((224-128))+rax]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm11,6
	vpslld	ymm2,ymm11,26
	vmovdqu	YMMWORD[(288-256-128)+rbx],ymm6
	vpaddd	ymm6,ymm6,ymm14

	vpsrld	ymm1,ymm11,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm11,21
	vpaddd	ymm6,ymm6,YMMWORD[((-96))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm11,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,7
	vpandn	ymm0,ymm11,ymm13
	vpand	ymm4,ymm11,ymm12

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm14,ymm15,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm15,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm8,ymm15

	vpxor	ymm14,ymm14,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm15,13

	vpslld	ymm2,ymm15,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm14,ymm1

	vpsrld	ymm1,ymm15,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,10
	vpxor	ymm14,ymm8,ymm3
	vpaddd	ymm10,ymm10,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm14,ymm14,ymm6
	vpaddd	ymm14,ymm14,ymm7
	vmovdqu	ymm6,YMMWORD[((352-256-128))+rbx]
	vpaddd	ymm5,ymm5,YMMWORD[((96-128))+rax]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((256-256-128))+rbx]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm10,6
	vpslld	ymm2,ymm10,26
	vmovdqu	YMMWORD[(320-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm13

	vpsrld	ymm1,ymm10,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm10,21
	vpaddd	ymm5,ymm5,YMMWORD[((-64))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm10,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,7
	vpandn	ymm0,ymm10,ymm12
	vpand	ymm3,ymm10,ymm11

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm13,ymm14,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm14,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm15,ymm14

	vpxor	ymm13,ymm13,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm14,13

	vpslld	ymm2,ymm14,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm13,ymm1

	vpsrld	ymm1,ymm14,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,10
	vpxor	ymm13,ymm15,ymm4
	vpaddd	ymm9,ymm9,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm13,ymm13,ymm5
	vpaddd	ymm13,ymm13,ymm7
	vmovdqu	ymm5,YMMWORD[((384-256-128))+rbx]
	vpaddd	ymm6,ymm6,YMMWORD[((128-128))+rax]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((288-256-128))+rbx]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm9,6
	vpslld	ymm2,ymm9,26
	vmovdqu	YMMWORD[(352-256-128)+rbx],ymm6
	vpaddd	ymm6,ymm6,ymm12

	vpsrld	ymm1,ymm9,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm9,21
	vpaddd	ymm6,ymm6,YMMWORD[((-32))+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm9,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,7
	vpandn	ymm0,ymm9,ymm11
	vpand	ymm4,ymm9,ymm10

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm12,ymm13,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm13,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm14,ymm13

	vpxor	ymm12,ymm12,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm13,13

	vpslld	ymm2,ymm13,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm12,ymm1

	vpsrld	ymm1,ymm13,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,10
	vpxor	ymm12,ymm14,ymm3
	vpaddd	ymm8,ymm8,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm12,ymm12,ymm6
	vpaddd	ymm12,ymm12,ymm7
	vmovdqu	ymm6,YMMWORD[((416-256-128))+rbx]
	vpaddd	ymm5,ymm5,YMMWORD[((160-128))+rax]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((320-256-128))+rbx]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm8,6
	vpslld	ymm2,ymm8,26
	vmovdqu	YMMWORD[(384-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm11

	vpsrld	ymm1,ymm8,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm8,21
	vpaddd	ymm5,ymm5,YMMWORD[rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm8,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm8,7
	vpandn	ymm0,ymm8,ymm10
	vpand	ymm3,ymm8,ymm9

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm11,ymm12,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm12,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm13,ymm12

	vpxor	ymm11,ymm11,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm12,13

	vpslld	ymm2,ymm12,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm11,ymm1

	vpsrld	ymm1,ymm12,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm12,10
	vpxor	ymm11,ymm13,ymm4
	vpaddd	ymm15,ymm15,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm11,ymm11,ymm5
	vpaddd	ymm11,ymm11,ymm7
	vmovdqu	ymm5,YMMWORD[((448-256-128))+rbx]
	vpaddd	ymm6,ymm6,YMMWORD[((192-128))+rax]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((352-256-128))+rbx]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm15,6
	vpslld	ymm2,ymm15,26
	vmovdqu	YMMWORD[(416-256-128)+rbx],ymm6
	vpaddd	ymm6,ymm6,ymm10

	vpsrld	ymm1,ymm15,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm15,21
	vpaddd	ymm6,ymm6,YMMWORD[32+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm15,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm15,7
	vpandn	ymm0,ymm15,ymm9
	vpand	ymm4,ymm15,ymm8

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm10,ymm11,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm11,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm12,ymm11

	vpxor	ymm10,ymm10,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm11,13

	vpslld	ymm2,ymm11,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm10,ymm1

	vpsrld	ymm1,ymm11,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm11,10
	vpxor	ymm10,ymm12,ymm3
	vpaddd	ymm14,ymm14,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm10,ymm10,ymm6
	vpaddd	ymm10,ymm10,ymm7
	vmovdqu	ymm6,YMMWORD[((480-256-128))+rbx]
	vpaddd	ymm5,ymm5,YMMWORD[((224-128))+rax]

	vpsrld	ymm7,ymm6,3
	vpsrld	ymm1,ymm6,7
	vpslld	ymm2,ymm6,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm6,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm6,14
	vmovdqu	ymm0,YMMWORD[((384-256-128))+rbx]
	vpsrld	ymm3,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm5,ymm5,ymm7
	vpxor	ymm7,ymm3,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm5,ymm5,ymm7
	vpsrld	ymm7,ymm14,6
	vpslld	ymm2,ymm14,26
	vmovdqu	YMMWORD[(448-256-128)+rbx],ymm5
	vpaddd	ymm5,ymm5,ymm9

	vpsrld	ymm1,ymm14,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm14,21
	vpaddd	ymm5,ymm5,YMMWORD[64+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm14,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm14,7
	vpandn	ymm0,ymm14,ymm8
	vpand	ymm3,ymm14,ymm15

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm9,ymm10,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm10,30
	vpxor	ymm0,ymm0,ymm3
	vpxor	ymm3,ymm11,ymm10

	vpxor	ymm9,ymm9,ymm1
	vpaddd	ymm5,ymm5,ymm7

	vpsrld	ymm1,ymm10,13

	vpslld	ymm2,ymm10,19
	vpaddd	ymm5,ymm5,ymm0
	vpand	ymm4,ymm4,ymm3

	vpxor	ymm7,ymm9,ymm1

	vpsrld	ymm1,ymm10,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm10,10
	vpxor	ymm9,ymm11,ymm4
	vpaddd	ymm13,ymm13,ymm5

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm9,ymm9,ymm5
	vpaddd	ymm9,ymm9,ymm7
	vmovdqu	ymm5,YMMWORD[((0-128))+rax]
	vpaddd	ymm6,ymm6,YMMWORD[((256-256-128))+rbx]

	vpsrld	ymm7,ymm5,3
	vpsrld	ymm1,ymm5,7
	vpslld	ymm2,ymm5,25
	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm5,18
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm5,14
	vmovdqu	ymm0,YMMWORD[((416-256-128))+rbx]
	vpsrld	ymm4,ymm0,10

	vpxor	ymm7,ymm7,ymm1
	vpsrld	ymm1,ymm0,17
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,15
	vpaddd	ymm6,ymm6,ymm7
	vpxor	ymm7,ymm4,ymm1
	vpsrld	ymm1,ymm0,19
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm0,13
	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2
	vpaddd	ymm6,ymm6,ymm7
	vpsrld	ymm7,ymm13,6
	vpslld	ymm2,ymm13,26
	vmovdqu	YMMWORD[(480-256-128)+rbx],ymm6
	vpaddd	ymm6,ymm6,ymm8

	vpsrld	ymm1,ymm13,11
	vpxor	ymm7,ymm7,ymm2
	vpslld	ymm2,ymm13,21
	vpaddd	ymm6,ymm6,YMMWORD[96+rbp]
	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm1,ymm13,25
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm13,7
	vpandn	ymm0,ymm13,ymm15
	vpand	ymm4,ymm13,ymm14

	vpxor	ymm7,ymm7,ymm1

	vpsrld	ymm8,ymm9,2
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm1,ymm9,30
	vpxor	ymm0,ymm0,ymm4
	vpxor	ymm4,ymm10,ymm9

	vpxor	ymm8,ymm8,ymm1
	vpaddd	ymm6,ymm6,ymm7

	vpsrld	ymm1,ymm9,13

	vpslld	ymm2,ymm9,19
	vpaddd	ymm6,ymm6,ymm0
	vpand	ymm3,ymm3,ymm4

	vpxor	ymm7,ymm8,ymm1

	vpsrld	ymm1,ymm9,22
	vpxor	ymm7,ymm7,ymm2

	vpslld	ymm2,ymm9,10
	vpxor	ymm8,ymm10,ymm3
	vpaddd	ymm12,ymm12,ymm6

	vpxor	ymm7,ymm7,ymm1
	vpxor	ymm7,ymm7,ymm2

	vpaddd	ymm8,ymm8,ymm6
	vpaddd	ymm8,ymm8,ymm7
	add	rbp,256
	dec	ecx
	jnz	NEAR $L$oop_16_xx_avx2

	mov	ecx,1
	lea	rbx,[512+rsp]
	lea	rbp,[((K256+128))]
	cmp	ecx,DWORD[rbx]
	cmovge	r12,rbp
	cmp	ecx,DWORD[4+rbx]
	cmovge	r13,rbp
	cmp	ecx,DWORD[8+rbx]
	cmovge	r14,rbp
	cmp	ecx,DWORD[12+rbx]
	cmovge	r15,rbp
	cmp	ecx,DWORD[16+rbx]
	cmovge	r8,rbp
	cmp	ecx,DWORD[20+rbx]
	cmovge	r9,rbp
	cmp	ecx,DWORD[24+rbx]
	cmovge	r10,rbp
	cmp	ecx,DWORD[28+rbx]
	cmovge	r11,rbp
	vmovdqa	ymm7,YMMWORD[rbx]
	vpxor	ymm0,ymm0,ymm0
	vmovdqa	ymm6,ymm7
	vpcmpgtd	ymm6,ymm6,ymm0
	vpaddd	ymm7,ymm7,ymm6

	vmovdqu	ymm0,YMMWORD[((0-128))+rdi]
	vpand	ymm8,ymm8,ymm6
	vmovdqu	ymm1,YMMWORD[((32-128))+rdi]
	vpand	ymm9,ymm9,ymm6
	vmovdqu	ymm2,YMMWORD[((64-128))+rdi]
	vpand	ymm10,ymm10,ymm6
	vmovdqu	ymm5,YMMWORD[((96-128))+rdi]
	vpand	ymm11,ymm11,ymm6
	vpaddd	ymm8,ymm8,ymm0
	vmovdqu	ymm0,YMMWORD[((128-128))+rdi]
	vpand	ymm12,ymm12,ymm6
	vpaddd	ymm9,ymm9,ymm1
	vmovdqu	ymm1,YMMWORD[((160-128))+rdi]
	vpand	ymm13,ymm13,ymm6
	vpaddd	ymm10,ymm10,ymm2
	vmovdqu	ymm2,YMMWORD[((192-128))+rdi]
	vpand	ymm14,ymm14,ymm6
	vpaddd	ymm11,ymm11,ymm5
	vmovdqu	ymm5,YMMWORD[((224-128))+rdi]
	vpand	ymm15,ymm15,ymm6
	vpaddd	ymm12,ymm12,ymm0
	vpaddd	ymm13,ymm13,ymm1
	vmovdqu	YMMWORD[(0-128)+rdi],ymm8
	vpaddd	ymm14,ymm14,ymm2
	vmovdqu	YMMWORD[(32-128)+rdi],ymm9
	vpaddd	ymm15,ymm15,ymm5
	vmovdqu	YMMWORD[(64-128)+rdi],ymm10
	vmovdqu	YMMWORD[(96-128)+rdi],ymm11
	vmovdqu	YMMWORD[(128-128)+rdi],ymm12
	vmovdqu	YMMWORD[(160-128)+rdi],ymm13
	vmovdqu	YMMWORD[(192-128)+rdi],ymm14
	vmovdqu	YMMWORD[(224-128)+rdi],ymm15

	vmovdqu	YMMWORD[rbx],ymm7
	lea	rbx,[((256+128))+rsp]
	vmovdqu	ymm6,YMMWORD[$L$pbswap]
	dec	edx
	jnz	NEAR $L$oop_avx2







$L$done_avx2:
	mov	rax,QWORD[544+rsp]

	vzeroupper
	movaps	xmm6,XMMWORD[((-216))+rax]
	movaps	xmm7,XMMWORD[((-200))+rax]
	movaps	xmm8,XMMWORD[((-184))+rax]
	movaps	xmm9,XMMWORD[((-168))+rax]
	movaps	xmm10,XMMWORD[((-152))+rax]
	movaps	xmm11,XMMWORD[((-136))+rax]
	movaps	xmm12,XMMWORD[((-120))+rax]
	movaps	xmm13,XMMWORD[((-104))+rax]
	movaps	xmm14,XMMWORD[((-88))+rax]
	movaps	xmm15,XMMWORD[((-72))+rax]
	mov	r15,QWORD[((-48))+rax]

	mov	r14,QWORD[((-40))+rax]

	mov	r13,QWORD[((-32))+rax]

	mov	r12,QWORD[((-24))+rax]

	mov	rbp,QWORD[((-16))+rax]

	mov	rbx,QWORD[((-8))+rax]

	lea	rsp,[rax]

$L$epilogue_avx2:
	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
	mov	rsi,QWORD[16+rsp]
	DB	0F3h,0C3h		;repret

$L$SEH_end_sha256_multi_block_avx2:
ALIGN	256
K256:
	DD	1116352408,1116352408,1116352408,1116352408
	DD	1116352408,1116352408,1116352408,1116352408
	DD	1899447441,1899447441,1899447441,1899447441
	DD	1899447441,1899447441,1899447441,1899447441
	DD	3049323471,3049323471,3049323471,3049323471
	DD	3049323471,3049323471,3049323471,3049323471
	DD	3921009573,3921009573,3921009573,3921009573
	DD	3921009573,3921009573,3921009573,3921009573
	DD	961987163,961987163,961987163,961987163
	DD	961987163,961987163,961987163,961987163
	DD	1508970993,1508970993,1508970993,1508970993
	DD	1508970993,1508970993,1508970993,1508970993
	DD	2453635748,2453635748,2453635748,2453635748
	DD	2453635748,2453635748,2453635748,2453635748
	DD	2870763221,2870763221,2870763221,2870763221
	DD	2870763221,2870763221,2870763221,2870763221
	DD	3624381080,3624381080,3624381080,3624381080
	DD	3624381080,3624381080,3624381080,3624381080
	DD	310598401,310598401,310598401,310598401
	DD	310598401,310598401,310598401,310598401
	DD	607225278,607225278,607225278,607225278
	DD	607225278,607225278,607225278,607225278
	DD	1426881987,1426881987,1426881987,1426881987
	DD	1426881987,1426881987,1426881987,1426881987
	DD	1925078388,1925078388,1925078388,1925078388
	DD	1925078388,1925078388,1925078388,1925078388
	DD	2162078206,2162078206,2162078206,2162078206
	DD	2162078206,2162078206,2162078206,2162078206
	DD	2614888103,2614888103,2614888103,2614888103
	DD	2614888103,2614888103,2614888103,2614888103
	DD	3248222580,3248222580,3248222580,3248222580
	DD	3248222580,3248222580,3248222580,3248222580
	DD	3835390401,3835390401,3835390401,3835390401
	DD	3835390401,3835390401,3835390401,3835390401
	DD	4022224774,4022224774,4022224774,4022224774
	DD	4022224774,4022224774,4022224774,4022224774
	DD	264347078,264347078,264347078,264347078
	DD	264347078,264347078,264347078,264347078
	DD	604807628,604807628,604807628,604807628
	DD	604807628,604807628,604807628,604807628
	DD	770255983,770255983,770255983,770255983
	DD	770255983,770255983,770255983,770255983
	DD	1249150122,1249150122,1249150122,1249150122
	DD	1249150122,1249150122,1249150122,1249150122
	DD	1555081692,1555081692,1555081692,1555081692
	DD	1555081692,1555081692,1555081692,1555081692
	DD	1996064986,1996064986,1996064986,1996064986
	DD	1996064986,1996064986,1996064986,1996064986
	DD	2554220882,2554220882,2554220882,2554220882
	DD	2554220882,2554220882,2554220882,2554220882
	DD	2821834349,2821834349,2821834349,2821834349
	DD	2821834349,2821834349,2821834349,2821834349
	DD	2952996808,2952996808,2952996808,2952996808
	DD	2952996808,2952996808,2952996808,2952996808
	DD	3210313671,3210313671,3210313671,3210313671
	DD	3210313671,3210313671,3210313671,3210313671
	DD	3336571891,3336571891,3336571891,3336571891
	DD	3336571891,3336571891,3336571891,3336571891
	DD	3584528711,3584528711,3584528711,3584528711
	DD	3584528711,3584528711,3584528711,3584528711
	DD	113926993,113926993,113926993,113926993
	DD	113926993,113926993,113926993,113926993
	DD	338241895,338241895,338241895,338241895
	DD	338241895,338241895,338241895,338241895
	DD	666307205,666307205,666307205,666307205
	DD	666307205,666307205,666307205,666307205
	DD	773529912,773529912,773529912,773529912
	DD	773529912,773529912,773529912,773529912
	DD	1294757372,1294757372,1294757372,1294757372
	DD	1294757372,1294757372,1294757372,1294757372
	DD	1396182291,1396182291,1396182291,1396182291
	DD	1396182291,1396182291,1396182291,1396182291
	DD	1695183700,1695183700,1695183700,1695183700
	DD	1695183700,1695183700,1695183700,1695183700
	DD	1986661051,1986661051,1986661051,1986661051
	DD	1986661051,1986661051,1986661051,1986661051
	DD	2177026350,2177026350,2177026350,2177026350
	DD	2177026350,2177026350,2177026350,2177026350
	DD	2456956037,2456956037,2456956037,2456956037
	DD	2456956037,2456956037,2456956037,2456956037
	DD	2730485921,2730485921,2730485921,2730485921
	DD	2730485921,2730485921,2730485921,2730485921
	DD	2820302411,2820302411,2820302411,2820302411
	DD	2820302411,2820302411,2820302411,2820302411
	DD	3259730800,3259730800,3259730800,3259730800
	DD	3259730800,3259730800,3259730800,3259730800
	DD	3345764771,3345764771,3345764771,3345764771
	DD	3345764771,3345764771,3345764771,3345764771
	DD	3516065817,3516065817,3516065817,3516065817
	DD	3516065817,3516065817,3516065817,3516065817
	DD	3600352804,3600352804,3600352804,3600352804
	DD	3600352804,3600352804,3600352804,3600352804
	DD	4094571909,4094571909,4094571909,4094571909
	DD	4094571909,4094571909,4094571909,4094571909
	DD	275423344,275423344,275423344,275423344
	DD	275423344,275423344,275423344,275423344
	DD	430227734,430227734,430227734,430227734
	DD	430227734,430227734,430227734,430227734
	DD	506948616,506948616,506948616,506948616
	DD	506948616,506948616,506948616,506948616
	DD	659060556,659060556,659060556,659060556
	DD	659060556,659060556,659060556,659060556
	DD	883997877,883997877,883997877,883997877
	DD	883997877,883997877,883997877,883997877
	DD	958139571,958139571,958139571,958139571
	DD	958139571,958139571,958139571,958139571
	DD	1322822218,1322822218,1322822218,1322822218
	DD	1322822218,1322822218,1322822218,1322822218
	DD	1537002063,1537002063,1537002063,1537002063
	DD	1537002063,1537002063,1537002063,1537002063
	DD	1747873779,1747873779,1747873779,1747873779
	DD	1747873779,1747873779,1747873779,1747873779
	DD	1955562222,1955562222,1955562222,1955562222
	DD	1955562222,1955562222,1955562222,1955562222
	DD	2024104815,2024104815,2024104815,2024104815
	DD	2024104815,2024104815,2024104815,2024104815
	DD	2227730452,2227730452,2227730452,2227730452
	DD	2227730452,2227730452,2227730452,2227730452
	DD	2361852424,2361852424,2361852424,2361852424
	DD	2361852424,2361852424,2361852424,2361852424
	DD	2428436474,2428436474,2428436474,2428436474
	DD	2428436474,2428436474,2428436474,2428436474
	DD	2756734187,2756734187,2756734187,2756734187
	DD	2756734187,2756734187,2756734187,2756734187
	DD	3204031479,3204031479,3204031479,3204031479
	DD	3204031479,3204031479,3204031479,3204031479
	DD	3329325298,3329325298,3329325298,3329325298
	DD	3329325298,3329325298,3329325298,3329325298
$L$pbswap:
	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
K256_shaext:
	DD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
	DD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
	DD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
	DD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
	DD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
	DD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
	DD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
	DD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
	DD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
	DD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
	DD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
	DD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
	DD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
	DD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
	DD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
	DD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
DB	83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111
DB	99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114
DB	32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
DB	65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112
DB	101,110,115,115,108,46,111,114,103,62,0
EXTERN	__imp_RtlVirtualUnwind

ALIGN	16
se_handler:
	push	rsi
	push	rdi
	push	rbx
	push	rbp
	push	r12
	push	r13
	push	r14
	push	r15
	pushfq
	sub	rsp,64

	mov	rax,QWORD[120+r8]
	mov	rbx,QWORD[248+r8]

	mov	rsi,QWORD[8+r9]
	mov	r11,QWORD[56+r9]

	mov	r10d,DWORD[r11]
	lea	r10,[r10*1+rsi]
	cmp	rbx,r10
	jb	NEAR $L$in_prologue

	mov	rax,QWORD[152+r8]

	mov	r10d,DWORD[4+r11]
	lea	r10,[r10*1+rsi]
	cmp	rbx,r10
	jae	NEAR $L$in_prologue

	mov	rax,QWORD[272+rax]

	mov	rbx,QWORD[((-8))+rax]
	mov	rbp,QWORD[((-16))+rax]
	mov	QWORD[144+r8],rbx
	mov	QWORD[160+r8],rbp

	lea	rsi,[((-24-160))+rax]
	lea	rdi,[512+r8]
	mov	ecx,20
	DD	0xa548f3fc

$L$in_prologue:
	mov	rdi,QWORD[8+rax]
	mov	rsi,QWORD[16+rax]
	mov	QWORD[152+r8],rax
	mov	QWORD[168+r8],rsi
	mov	QWORD[176+r8],rdi

	mov	rdi,QWORD[40+r9]
	mov	rsi,r8
	mov	ecx,154
	DD	0xa548f3fc

	mov	rsi,r9
	xor	rcx,rcx
	mov	rdx,QWORD[8+rsi]
	mov	r8,QWORD[rsi]
	mov	r9,QWORD[16+rsi]
	mov	r10,QWORD[40+rsi]
	lea	r11,[56+rsi]
	lea	r12,[24+rsi]
	mov	QWORD[32+rsp],r10
	mov	QWORD[40+rsp],r11
	mov	QWORD[48+rsp],r12
	mov	QWORD[56+rsp],rcx
	call	QWORD[__imp_RtlVirtualUnwind]

	mov	eax,1
	add	rsp,64
	popfq
	pop	r15
	pop	r14
	pop	r13
	pop	r12
	pop	rbp
	pop	rbx
	pop	rdi
	pop	rsi
	DB	0F3h,0C3h		;repret


ALIGN	16
avx2_handler:
	push	rsi
	push	rdi
	push	rbx
	push	rbp
	push	r12
	push	r13
	push	r14
	push	r15
	pushfq
	sub	rsp,64

	mov	rax,QWORD[120+r8]
	mov	rbx,QWORD[248+r8]

	mov	rsi,QWORD[8+r9]
	mov	r11,QWORD[56+r9]

	mov	r10d,DWORD[r11]
	lea	r10,[r10*1+rsi]
	cmp	rbx,r10
	jb	NEAR $L$in_prologue

	mov	rax,QWORD[152+r8]

	mov	r10d,DWORD[4+r11]
	lea	r10,[r10*1+rsi]
	cmp	rbx,r10
	jae	NEAR $L$in_prologue

	mov	rax,QWORD[544+r8]

	mov	rbx,QWORD[((-8))+rax]
	mov	rbp,QWORD[((-16))+rax]
	mov	r12,QWORD[((-24))+rax]
	mov	r13,QWORD[((-32))+rax]
	mov	r14,QWORD[((-40))+rax]
	mov	r15,QWORD[((-48))+rax]
	mov	QWORD[144+r8],rbx
	mov	QWORD[160+r8],rbp
	mov	QWORD[216+r8],r12
	mov	QWORD[224+r8],r13
	mov	QWORD[232+r8],r14
	mov	QWORD[240+r8],r15

	lea	rsi,[((-56-160))+rax]
	lea	rdi,[512+r8]
	mov	ecx,20
	DD	0xa548f3fc

	jmp	NEAR $L$in_prologue

section	.pdata rdata align=4
ALIGN	4
	DD	$L$SEH_begin_sha256_multi_block wrt ..imagebase
	DD	$L$SEH_end_sha256_multi_block wrt ..imagebase
	DD	$L$SEH_info_sha256_multi_block wrt ..imagebase
	DD	$L$SEH_begin_sha256_multi_block_shaext wrt ..imagebase
	DD	$L$SEH_end_sha256_multi_block_shaext wrt ..imagebase
	DD	$L$SEH_info_sha256_multi_block_shaext wrt ..imagebase
	DD	$L$SEH_begin_sha256_multi_block_avx wrt ..imagebase
	DD	$L$SEH_end_sha256_multi_block_avx wrt ..imagebase
	DD	$L$SEH_info_sha256_multi_block_avx wrt ..imagebase
	DD	$L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase
	DD	$L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase
	DD	$L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase
section	.xdata rdata align=8
ALIGN	8
$L$SEH_info_sha256_multi_block:
DB	9,0,0,0
	DD	se_handler wrt ..imagebase
	DD	$L$body wrt ..imagebase,$L$epilogue wrt ..imagebase
$L$SEH_info_sha256_multi_block_shaext:
DB	9,0,0,0
	DD	se_handler wrt ..imagebase
	DD	$L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
$L$SEH_info_sha256_multi_block_avx:
DB	9,0,0,0
	DD	se_handler wrt ..imagebase
	DD	$L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
$L$SEH_info_sha256_multi_block_avx2:
DB	9,0,0,0
	DD	avx2_handler wrt ..imagebase
	DD	$L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase

Kontol Shell Bypass