summaryrefslogtreecommitdiffstats
path: root/win-x86_64
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-01-22 14:27:53 -0800
committerAdam Langley <agl@google.com>2015-01-30 16:52:14 -0800
commitd9e397b599b13d642138480a28c14db7a136bf05 (patch)
tree34bab61dc4ce323b123ad4614dbc07e86ea2f9ef /win-x86_64
downloadexternal_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.zip
external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.gz
external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.bz2
Initial commit of BoringSSL for Android.
Diffstat (limited to 'win-x86_64')
-rw-r--r--win-x86_64/crypto/aes/aes-x86_64.asm2864
-rw-r--r--win-x86_64/crypto/aes/aesni-x86_64.asm3631
-rw-r--r--win-x86_64/crypto/aes/bsaes-x86_64.asm2734
-rw-r--r--win-x86_64/crypto/aes/vpaes-x86_64.asm1143
-rw-r--r--win-x86_64/crypto/bn/modexp512-x86_64.asm1887
-rw-r--r--win-x86_64/crypto/bn/rsaz-avx2.asm29
-rw-r--r--win-x86_64/crypto/bn/rsaz-x86_64.asm1326
-rw-r--r--win-x86_64/crypto/bn/x86_64-mont.asm945
-rw-r--r--win-x86_64/crypto/bn/x86_64-mont5.asm2061
-rw-r--r--win-x86_64/crypto/cpu-x86_64-asm.asm158
-rw-r--r--win-x86_64/crypto/md5/md5-x86_64.asm778
-rw-r--r--win-x86_64/crypto/modes/aesni-gcm-x86_64.asm19
-rw-r--r--win-x86_64/crypto/modes/ghash-x86_64.asm1510
-rw-r--r--win-x86_64/crypto/rc4/rc4-md5-x86_64.asm1374
-rw-r--r--win-x86_64/crypto/rc4/rc4-x86_64.asm773
-rw-r--r--win-x86_64/crypto/sha/sha1-x86_64.asm2619
-rw-r--r--win-x86_64/crypto/sha/sha256-x86_64.asm2997
-rw-r--r--win-x86_64/crypto/sha/sha512-x86_64.asm1913
18 files changed, 28761 insertions, 0 deletions
diff --git a/win-x86_64/crypto/aes/aes-x86_64.asm b/win-x86_64/crypto/aes/aes-x86_64.asm
new file mode 100644
index 0000000..96cbb4b
--- /dev/null
+++ b/win-x86_64/crypto/aes/aes-x86_64.asm
@@ -0,0 +1,2864 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+ALIGN 16
+_x86_64_AES_encrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ shr ecx,16
+ movzx ebp,ah
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ mov edx,DWORD PTR[12+r15]
+ movzx edi,bh
+ movzx ebp,ch
+ mov eax,DWORD PTR[r15]
+ xor r12d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$enc_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[2+rsi*8+r14]
+ movzx r11d,BYTE PTR[2+rdi*8+r14]
+ movzx r12d,BYTE PTR[2+rbp*8+r14]
+
+ movzx esi,dl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx r8d,BYTE PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and edi,00000ff00h
+ and ebp,00000ff00h
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr ecx,16
+
+ movzx esi,dh
+ movzx edi,ah
+ shr edx,16
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+
+ and esi,00000ff00h
+ and edi,00000ff00h
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,000ff0000h
+ and ebp,000ff0000h
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,dh
+ movzx ebp,ah
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov ebp,DWORD PTR[2+rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,0ff000000h
+ and ebp,0ff000000h
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,bh
+ movzx edi,ch
+ mov edx,DWORD PTR[((16+12))+r15]
+ mov esi,DWORD PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ and esi,0ff000000h
+ and edi,0ff000000h
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+DB 0f3h,0c3h
+_x86_64_AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_encrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r8d,dl
+ movzx esi,bh
+ movzx edi,ch
+ shr ecx,16
+ movzx ebp,dh
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+ movzx r8d,BYTE PTR[r8*1+r14]
+
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx esi,ah
+ movzx r13d,BYTE PTR[rdi*1+r14]
+ movzx edi,cl
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+
+ shl r9d,8
+ shr edx,16
+ shl r13d,8
+ xor r10d,r9d
+ shr eax,16
+ movzx r9d,dl
+ shr ebx,16
+ xor r11d,r13d
+ shl ebp,8
+ movzx r13d,al
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r12d,ebp
+
+ shl esi,8
+ movzx ebp,bl
+ shl edi,16
+ xor r8d,esi
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx esi,dh
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r10d,edi
+
+ shr ecx,8
+ movzx edi,ah
+ shl r9d,16
+ shr ebx,8
+ shl r13d,16
+ xor r11d,r9d
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rcx*1+r14]
+ movzx ecx,BYTE PTR[rbx*1+r14]
+
+ shl ebp,16
+ xor r12d,r13d
+ shl esi,24
+ xor r8d,ebp
+ shl edi,24
+ xor r10d,esi
+ shl edx,24
+ xor r11d,edi
+ shl ecx,24
+ mov eax,r10d
+ mov ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$enc_compact_done
+ mov r10d,080808080h
+ mov r11d,080808080h
+ and r10d,eax
+ and r11d,ebx
+ mov esi,r10d
+ mov edi,r11d
+ shr r10d,7
+ lea r8d,DWORD PTR[rax*1+rax]
+ shr r11d,7
+ lea r9d,DWORD PTR[rbx*1+rbx]
+ sub esi,r10d
+ sub edi,r11d
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r10d,eax
+ mov r11d,ebx
+ xor r8d,esi
+ xor r9d,edi
+
+ xor eax,r8d
+ xor ebx,r9d
+ mov r12d,080808080h
+ rol eax,24
+ mov ebp,080808080h
+ rol ebx,24
+ and r12d,ecx
+ and ebp,edx
+ xor eax,r8d
+ xor ebx,r9d
+ mov esi,r12d
+ ror r10d,16
+ mov edi,ebp
+ ror r11d,16
+ lea r8d,DWORD PTR[rcx*1+rcx]
+ shr r12d,7
+ xor eax,r10d
+ shr ebp,7
+ xor ebx,r11d
+ ror r10d,8
+ lea r9d,DWORD PTR[rdx*1+rdx]
+ ror r11d,8
+ sub esi,r12d
+ sub edi,ebp
+ xor eax,r10d
+ xor ebx,r11d
+
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r12d,ecx
+ mov ebp,edx
+ xor r8d,esi
+ xor r9d,edi
+
+ ror r12d,16
+ xor ecx,r8d
+ ror ebp,16
+ xor edx,r9d
+ rol ecx,24
+ mov esi,DWORD PTR[r14]
+ rol edx,24
+ xor ecx,r8d
+ mov edi,DWORD PTR[64+r14]
+ xor edx,r9d
+ mov r8d,DWORD PTR[128+r14]
+ xor ecx,r12d
+ ror r12d,8
+ xor edx,ebp
+ ror ebp,8
+ xor ecx,r12d
+ mov r9d,DWORD PTR[192+r14]
+ xor edx,ebp
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+DB 0f3h,0c3h
+_x86_64_AES_encrypt_compact ENDP
+ALIGN 16
+PUBLIC asm_AES_encrypt
+
+
+asm_AES_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_AES_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$enc_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Te+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_AES_encrypt::
+asm_AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ shr eax,16
+ movzx ebp,ch
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ mov edx,DWORD PTR[12+r15]
+ movzx ebp,ah
+ xor r12d,DWORD PTR[1+rsi*8+r14]
+ mov eax,DWORD PTR[r15]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ xor eax,r10d
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor ecx,r12d
+ xor ebx,r11d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$dec_loop
+ lea r14,QWORD PTR[2048+r14]
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[rsi*1+r14]
+ movzx r11d,BYTE PTR[rdi*1+r14]
+ movzx r12d,BYTE PTR[rbp*1+r14]
+
+ movzx esi,dl
+ movzx edi,dh
+ movzx ebp,ah
+ movzx r8d,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl edi,8
+ shl ebp,8
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr edx,16
+
+ movzx esi,bh
+ movzx edi,ch
+ shr eax,16
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+
+ shl esi,8
+ shl edi,8
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,16
+ shl ebp,16
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,24
+ shl ebp,24
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,dh
+ movzx edi,ah
+ mov edx,DWORD PTR[((16+12))+r15]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ shl esi,24
+ shl edi,24
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ lea r14,QWORD PTR[((-2048))+r14]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+DB 0f3h,0c3h
+_x86_64_AES_decrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$dec_loop_compact
+
+ALIGN 16
+$L$dec_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r8d,dl
+ movzx esi,dh
+ movzx edi,ah
+ shr edx,16
+ movzx ebp,bh
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+ movzx r8d,BYTE PTR[r8*1+r14]
+
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx esi,ch
+ movzx r13d,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+
+ shr ecx,16
+ shl r13d,8
+ shl r9d,8
+ movzx edi,cl
+ shr eax,16
+ xor r10d,r9d
+ shr ebx,16
+ movzx r9d,dl
+
+ shl ebp,8
+ xor r11d,r13d
+ shl esi,8
+ movzx r13d,al
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r12d,ebp
+ movzx ebp,bl
+
+ shl edi,16
+ xor r8d,esi
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx esi,bh
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ xor r10d,edi
+ movzx r13d,BYTE PTR[r13*1+r14]
+ movzx edi,ch
+
+ shl ebp,16
+ shl r9d,16
+ shl r13d,16
+ xor r8d,ebp
+ movzx ebp,dh
+ xor r11d,r9d
+ shr eax,8
+ xor r12d,r13d
+
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx ebx,BYTE PTR[rdi*1+r14]
+ movzx ecx,BYTE PTR[rbp*1+r14]
+ movzx edx,BYTE PTR[rax*1+r14]
+
+ mov eax,r10d
+ shl esi,24
+ shl ebx,24
+ shl ecx,24
+ xor eax,esi
+ shl edx,24
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$dec_compact_done
+
+ mov rsi,QWORD PTR[((256+0))+r14]
+ shl rbx,32
+ shl rdx,32
+ mov rdi,QWORD PTR[((256+8))+r14]
+ or rax,rbx
+ or rcx,rdx
+ mov rbp,QWORD PTR[((256+16))+r14]
+ mov r9,rsi
+ mov r12,rsi
+ and r9,rax
+ and r12,rcx
+ mov rbx,r9
+ mov rdx,r12
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r8,rbx
+ xor r11,rdx
+ mov r10,rsi
+ mov r13,rsi
+
+ and r10,r8
+ and r13,r11
+ mov rbx,r10
+ mov rdx,r13
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r9,rbx
+ xor r12,rdx
+ mov r10,rsi
+ mov r13,rsi
+
+ and r10,r9
+ and r13,r12
+ mov rbx,r10
+ mov rdx,r13
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ shr rbx,32
+ xor r12,r13
+ shr rdx,32
+ xor r10,r8
+ rol eax,8
+ xor r13,r11
+ rol ecx,8
+ xor r10,r9
+ rol ebx,8
+ xor r13,r12
+
+ rol edx,8
+ xor eax,r10d
+ shr r10,32
+ xor ecx,r13d
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ rol r8d,24
+ mov r13,r11
+ rol r11d,24
+ shr r10,32
+ xor eax,r8d
+ shr r13,32
+ xor ecx,r11d
+ rol r10d,24
+ mov r8,r9
+ rol r13d,24
+ mov r11,r12
+ shr r8,32
+ xor ebx,r10d
+ shr r11,32
+ xor edx,r13d
+
+ mov rsi,QWORD PTR[r14]
+ rol r9d,16
+ mov rdi,QWORD PTR[64+r14]
+ rol r12d,16
+ mov rbp,QWORD PTR[128+r14]
+ rol r8d,16
+ mov r10,QWORD PTR[192+r14]
+ xor eax,r9d
+ rol r11d,16
+ xor ecx,r12d
+ mov r13,QWORD PTR[256+r14]
+ xor ebx,r8d
+ xor edx,r11d
+ jmp $L$dec_loop_compact
+ALIGN 16
+$L$dec_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+DB 0f3h,0c3h
+_x86_64_AES_decrypt_compact ENDP
+ALIGN 16
+PUBLIC asm_AES_decrypt
+
+
+asm_AES_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_AES_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$dec_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Td+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+ shr rbp,3
+ add r14,rbp
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_AES_decrypt::
+asm_AES_decrypt ENDP
+ALIGN 16
+PUBLIC asm_AES_set_encrypt_key
+
+asm_AES_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_AES_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,8
+$L$enc_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$enc_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_AES_set_encrypt_key::
+asm_AES_set_encrypt_key ENDP
+
+
+ALIGN 16
+_x86_64_AES_set_encrypt_key PROC PRIVATE
+ mov ecx,esi
+ mov rsi,rdi
+ mov rdi,rdx
+
+ test rsi,-1
+ jz $L$badpointer
+ test rdi,-1
+ jz $L$badpointer
+
+ lea rbp,QWORD PTR[$L$AES_Te]
+ lea rbp,QWORD PTR[((2048+128))+rbp]
+
+
+ mov eax,DWORD PTR[((0-128))+rbp]
+ mov ebx,DWORD PTR[((32-128))+rbp]
+ mov r8d,DWORD PTR[((64-128))+rbp]
+ mov edx,DWORD PTR[((96-128))+rbp]
+ mov eax,DWORD PTR[((128-128))+rbp]
+ mov ebx,DWORD PTR[((160-128))+rbp]
+ mov r8d,DWORD PTR[((192-128))+rbp]
+ mov edx,DWORD PTR[((224-128))+rbp]
+
+ cmp ecx,128
+ je $L$10rounds
+ cmp ecx,192
+ je $L$12rounds
+ cmp ecx,256
+ je $L$14rounds
+ mov rax,-2
+ jmp $L$exit
+
+$L$10rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rdx,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$10shortcut
+ALIGN 4
+$L$10loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[12+rdi]
+$L$10shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[16+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[20+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[28+rdi],eax
+ add ecx,1
+ lea rdi,QWORD PTR[16+rdi]
+ cmp ecx,10
+ jl $L$10loop
+
+ mov DWORD PTR[80+rdi],10
+ xor rax,rax
+ jmp $L$exit
+
+$L$12rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdx,QWORD PTR[16+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$12shortcut
+ALIGN 4
+$L$12loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[20+rdi]
+$L$12shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[28+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[36+rdi],eax
+
+ cmp ecx,7
+ je $L$12break
+ add ecx,1
+
+ xor eax,DWORD PTR[16+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ lea rdi,QWORD PTR[24+rdi]
+ jmp $L$12loop
+$L$12break::
+ mov DWORD PTR[72+rdi],12
+ xor rax,rax
+ jmp $L$exit
+
+$L$14rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$14shortcut
+ALIGN 4
+$L$14loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[28+rdi]
+$L$14shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[36+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ cmp ecx,6
+ je $L$14break
+ add ecx,1
+
+ mov edx,eax
+ mov eax,DWORD PTR[16+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ shl ebx,8
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,16
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,24
+ xor eax,ebx
+
+ mov DWORD PTR[48+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[52+rdi],eax
+ xor eax,DWORD PTR[24+rdi]
+ mov DWORD PTR[56+rdi],eax
+ xor eax,DWORD PTR[28+rdi]
+ mov DWORD PTR[60+rdi],eax
+
+ lea rdi,QWORD PTR[32+rdi]
+ jmp $L$14loop
+$L$14break::
+ mov DWORD PTR[48+rdi],14
+ xor rax,rax
+ jmp $L$exit
+
+$L$badpointer::
+ mov rax,-1
+$L$exit::
+DB 0f3h,0c3h
+_x86_64_AES_set_encrypt_key ENDP
+ALIGN 16
+PUBLIC asm_AES_set_decrypt_key
+
+asm_AES_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_AES_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ push rdx
+$L$dec_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+ mov r8,QWORD PTR[rsp]
+ cmp eax,0
+ jne $L$abort
+
+ mov r14d,DWORD PTR[240+r8]
+ xor rdi,rdi
+ lea rcx,QWORD PTR[r14*4+rdi]
+ mov rsi,r8
+ lea rdi,QWORD PTR[rcx*4+r8]
+ALIGN 4
+$L$invert::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[rdi]
+ mov rdx,QWORD PTR[8+rdi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[rsi],rcx
+ mov QWORD PTR[8+rsi],rdx
+ lea rsi,QWORD PTR[16+rsi]
+ lea rdi,QWORD PTR[((-16))+rdi]
+ cmp rdi,rsi
+ jne $L$invert
+
+ lea rax,QWORD PTR[(($L$AES_Te+2048+1024))]
+
+ mov rsi,QWORD PTR[40+rax]
+ mov rdi,QWORD PTR[48+rax]
+ mov rbp,QWORD PTR[56+rax]
+
+ mov r15,r8
+ sub r14d,1
+ALIGN 4
+$L$permute::
+ lea r15,QWORD PTR[16+r15]
+ mov rax,QWORD PTR[r15]
+ mov rcx,QWORD PTR[8+r15]
+ mov r9,rsi
+ mov r12,rsi
+ and r9,rax
+ and r12,rcx
+ mov rbx,r9
+ mov rdx,r12
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r8,rbx
+ xor r11,rdx
+ mov r10,rsi
+ mov r13,rsi
+
+ and r10,r8
+ and r13,r11
+ mov rbx,r10
+ mov rdx,r13
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r9,rbx
+ xor r12,rdx
+ mov r10,rsi
+ mov r13,rsi
+
+ and r10,r9
+ and r13,r12
+ mov rbx,r10
+ mov rdx,r13
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ shr rbx,32
+ xor r12,r13
+ shr rdx,32
+ xor r10,r8
+ rol eax,8
+ xor r13,r11
+ rol ecx,8
+ xor r10,r9
+ rol ebx,8
+ xor r13,r12
+
+ rol edx,8
+ xor eax,r10d
+ shr r10,32
+ xor ecx,r13d
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ rol r8d,24
+ mov r13,r11
+ rol r11d,24
+ shr r10,32
+ xor eax,r8d
+ shr r13,32
+ xor ecx,r11d
+ rol r10d,24
+ mov r8,r9
+ rol r13d,24
+ mov r11,r12
+ shr r8,32
+ xor ebx,r10d
+ shr r11,32
+ xor edx,r13d
+
+
+ rol r9d,16
+
+ rol r12d,16
+
+ rol r8d,16
+
+ xor eax,r9d
+ rol r11d,16
+ xor ecx,r12d
+
+ xor ebx,r8d
+ xor edx,r11d
+ mov DWORD PTR[r15],eax
+ mov DWORD PTR[4+r15],ebx
+ mov DWORD PTR[8+r15],ecx
+ mov DWORD PTR[12+r15],edx
+ sub r14d,1
+ jnz $L$permute
+
+ xor rax,rax
+$L$abort::
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$dec_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_AES_set_decrypt_key::
+asm_AES_set_decrypt_key ENDP
+ALIGN 16
+PUBLIC asm_AES_cbc_encrypt
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+asm_AES_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_AES_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ cmp rdx,0
+ je $L$cbc_epilogue
+ pushfq
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+$L$cbc_prologue::
+
+ cld
+ mov r9d,r9d
+
+ lea r14,QWORD PTR[$L$AES_Te]
+ cmp r9,0
+ jne $L$cbc_picked_te
+ lea r14,QWORD PTR[$L$AES_Td]
+$L$cbc_picked_te::
+
+ mov r10d,DWORD PTR[OPENSSL_ia32cap_P]
+ cmp rdx,512
+ jb $L$cbc_slow_prologue
+ test rdx,15
+ jnz $L$cbc_slow_prologue
+ bt r10d,28
+ jc $L$cbc_slow_prologue
+
+
+ lea r15,QWORD PTR[((-88-248))+rsp]
+ and r15,-64
+
+
+ mov r10,r14
+ lea r11,QWORD PTR[2304+r14]
+ mov r12,r15
+ and r10,0FFFh
+ and r11,0FFFh
+ and r12,0FFFh
+
+ cmp r12,r11
+ jb $L$cbc_te_break_out
+ sub r12,r11
+ sub r15,r12
+ jmp $L$cbc_te_ok
+$L$cbc_te_break_out::
+ sub r12,r10
+ and r12,0FFFh
+ add r12,320
+ sub r15,r12
+ALIGN 4
+$L$cbc_te_ok::
+
+ xchg r15,rsp
+
+ mov QWORD PTR[16+rsp],r15
+$L$cbc_fast_body::
+ mov QWORD PTR[24+rsp],rdi
+ mov QWORD PTR[32+rsp],rsi
+ mov QWORD PTR[40+rsp],rdx
+ mov QWORD PTR[48+rsp],rcx
+ mov QWORD PTR[56+rsp],r8
+ mov DWORD PTR[((80+240))+rsp],0
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+
+ mov eax,DWORD PTR[240+r15]
+
+ mov r10,r15
+ sub r10,r14
+ and r10,0fffh
+ cmp r10,2304
+ jb $L$cbc_do_ecopy
+ cmp r10,4096-248
+ jb $L$cbc_skip_ecopy
+ALIGN 4
+$L$cbc_do_ecopy::
+ mov rsi,r15
+ lea rdi,QWORD PTR[80+rsp]
+ lea r15,QWORD PTR[80+rsp]
+ mov ecx,240/8
+ DD 090A548F3h
+ mov DWORD PTR[rdi],eax
+$L$cbc_skip_ecopy::
+ mov QWORD PTR[rsp],r15
+
+ mov ecx,18
+ALIGN 4
+$L$cbc_prefetch_te::
+ mov r10,QWORD PTR[r14]
+ mov r11,QWORD PTR[32+r14]
+ mov r12,QWORD PTR[64+r14]
+ mov r13,QWORD PTR[96+r14]
+ lea r14,QWORD PTR[128+r14]
+ sub ecx,1
+ jnz $L$cbc_prefetch_te
+ lea r14,QWORD PTR[((-2304))+r14]
+
+ cmp rbx,0
+ je $L$FAST_DECRYPT
+
+
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+
+ALIGN 4
+$L$cbc_fast_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_encrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ mov QWORD PTR[40+rsp],r10
+ jnz $L$cbc_fast_enc_loop
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_fast_cleanup
+
+
+ALIGN 16
+$L$FAST_DECRYPT::
+ cmp r9,r8
+ je $L$cbc_fast_dec_in_place
+
+ mov QWORD PTR[64+rsp],rbp
+ALIGN 4
+$L$cbc_fast_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov rbp,QWORD PTR[64+rsp]
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[rbp]
+ xor ebx,DWORD PTR[4+rbp]
+ xor ecx,DWORD PTR[8+rbp]
+ xor edx,DWORD PTR[12+rbp]
+ mov rbp,r8
+
+ sub r10,16
+ mov QWORD PTR[40+rsp],r10
+ mov QWORD PTR[64+rsp],rbp
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jnz $L$cbc_fast_dec_loop
+ mov r12,QWORD PTR[56+rsp]
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[r12],r10
+ mov QWORD PTR[8+r12],r11
+ jmp $L$cbc_fast_cleanup
+
+ALIGN 16
+$L$cbc_fast_dec_in_place::
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r10
+ mov QWORD PTR[((8+64))+rsp],r11
+ALIGN 4
+$L$cbc_fast_dec_in_place_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jz $L$cbc_fast_dec_in_place_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ mov QWORD PTR[40+rsp],r10
+ jmp $L$cbc_fast_dec_in_place_loop
+$L$cbc_fast_dec_in_place_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ALIGN 4
+$L$cbc_fast_cleanup::
+ cmp DWORD PTR[((80+240))+rsp],0
+ lea rdi,QWORD PTR[80+rsp]
+ je $L$cbc_exit
+ mov ecx,240/8
+ xor rax,rax
+ DD 090AB48F3h
+
+ jmp $L$cbc_exit
+
+
+ALIGN 16
+$L$cbc_slow_prologue::
+
+ lea rbp,QWORD PTR[((-88))+rsp]
+ and rbp,-64
+
+ lea r10,QWORD PTR[((-88-63))+rcx]
+ sub r10,rbp
+ neg r10
+ and r10,03c0h
+ sub rbp,r10
+
+ xchg rbp,rsp
+
+ mov QWORD PTR[16+rsp],rbp
+$L$cbc_slow_body::
+
+
+
+
+ mov QWORD PTR[56+rsp],r8
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+ mov r10,rdx
+
+ mov eax,DWORD PTR[240+r15]
+ mov QWORD PTR[rsp],r15
+ shl eax,4
+ lea rax,QWORD PTR[rax*1+r15]
+ mov QWORD PTR[8+rsp],rax
+
+
+ lea r14,QWORD PTR[2048+r14]
+ lea rax,QWORD PTR[((768-8))+rsp]
+ sub rax,r14
+ and rax,0300h
+ lea r14,QWORD PTR[rax*1+r14]
+
+ cmp rbx,0
+ je $L$SLOW_DECRYPT
+
+
+ test r10,-16
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+ jz $L$cbc_slow_enc_tail
+
+ALIGN 4
+$L$cbc_slow_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ jnz $L$cbc_slow_enc_loop
+ test r10,15
+ jnz $L$cbc_slow_enc_tail
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_enc_tail::
+ mov r11,rax
+ mov r12,rcx
+ mov rcx,r10
+ mov rsi,r8
+ mov rdi,r9
+ DD 09066A4F3h
+ mov rcx,16
+ sub rcx,r10
+ xor rax,rax
+ DD 09066AAF3h
+ mov r8,r9
+ mov r10,16
+ mov rax,r11
+ mov rcx,r12
+ jmp $L$cbc_slow_enc_loop
+
+ALIGN 16
+$L$SLOW_DECRYPT::
+ shr rax,3
+ add r14,rax
+
+ mov r11,QWORD PTR[rbp]
+ mov r12,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ALIGN 4
+$L$cbc_slow_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jc $L$cbc_slow_dec_partial
+ jz $L$cbc_slow_dec_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jmp $L$cbc_slow_dec_loop
+$L$cbc_slow_dec_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_dec_partial::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[((0+64))+rsp],eax
+ mov DWORD PTR[((4+64))+rsp],ebx
+ mov DWORD PTR[((8+64))+rsp],ecx
+ mov DWORD PTR[((12+64))+rsp],edx
+
+ mov rdi,r9
+ lea rsi,QWORD PTR[64+rsp]
+ lea rcx,QWORD PTR[16+r10]
+ DD 09066A4F3h
+ jmp $L$cbc_exit
+
+ALIGN 16
+$L$cbc_exit::
+ mov rsi,QWORD PTR[16+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$cbc_popfq::
+ popfq
+$L$cbc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_AES_cbc_encrypt::
+asm_AES_cbc_encrypt ENDP
+ALIGN 64
+$L$AES_Te::
+ DD 0a56363c6h,0a56363c6h
+ DD 0847c7cf8h,0847c7cf8h
+ DD 0997777eeh,0997777eeh
+ DD 08d7b7bf6h,08d7b7bf6h
+ DD 00df2f2ffh,00df2f2ffh
+ DD 0bd6b6bd6h,0bd6b6bd6h
+ DD 0b16f6fdeh,0b16f6fdeh
+ DD 054c5c591h,054c5c591h
+ DD 050303060h,050303060h
+ DD 003010102h,003010102h
+ DD 0a96767ceh,0a96767ceh
+ DD 07d2b2b56h,07d2b2b56h
+ DD 019fefee7h,019fefee7h
+ DD 062d7d7b5h,062d7d7b5h
+ DD 0e6abab4dh,0e6abab4dh
+ DD 09a7676ech,09a7676ech
+ DD 045caca8fh,045caca8fh
+ DD 09d82821fh,09d82821fh
+ DD 040c9c989h,040c9c989h
+ DD 0877d7dfah,0877d7dfah
+ DD 015fafaefh,015fafaefh
+ DD 0eb5959b2h,0eb5959b2h
+ DD 0c947478eh,0c947478eh
+ DD 00bf0f0fbh,00bf0f0fbh
+ DD 0ecadad41h,0ecadad41h
+ DD 067d4d4b3h,067d4d4b3h
+ DD 0fda2a25fh,0fda2a25fh
+ DD 0eaafaf45h,0eaafaf45h
+ DD 0bf9c9c23h,0bf9c9c23h
+ DD 0f7a4a453h,0f7a4a453h
+ DD 0967272e4h,0967272e4h
+ DD 05bc0c09bh,05bc0c09bh
+ DD 0c2b7b775h,0c2b7b775h
+ DD 01cfdfde1h,01cfdfde1h
+ DD 0ae93933dh,0ae93933dh
+ DD 06a26264ch,06a26264ch
+ DD 05a36366ch,05a36366ch
+ DD 0413f3f7eh,0413f3f7eh
+ DD 002f7f7f5h,002f7f7f5h
+ DD 04fcccc83h,04fcccc83h
+ DD 05c343468h,05c343468h
+ DD 0f4a5a551h,0f4a5a551h
+ DD 034e5e5d1h,034e5e5d1h
+ DD 008f1f1f9h,008f1f1f9h
+ DD 0937171e2h,0937171e2h
+ DD 073d8d8abh,073d8d8abh
+ DD 053313162h,053313162h
+ DD 03f15152ah,03f15152ah
+ DD 00c040408h,00c040408h
+ DD 052c7c795h,052c7c795h
+ DD 065232346h,065232346h
+ DD 05ec3c39dh,05ec3c39dh
+ DD 028181830h,028181830h
+ DD 0a1969637h,0a1969637h
+ DD 00f05050ah,00f05050ah
+ DD 0b59a9a2fh,0b59a9a2fh
+ DD 00907070eh,00907070eh
+ DD 036121224h,036121224h
+ DD 09b80801bh,09b80801bh
+ DD 03de2e2dfh,03de2e2dfh
+ DD 026ebebcdh,026ebebcdh
+ DD 06927274eh,06927274eh
+ DD 0cdb2b27fh,0cdb2b27fh
+ DD 09f7575eah,09f7575eah
+ DD 01b090912h,01b090912h
+ DD 09e83831dh,09e83831dh
+ DD 0742c2c58h,0742c2c58h
+ DD 02e1a1a34h,02e1a1a34h
+ DD 02d1b1b36h,02d1b1b36h
+ DD 0b26e6edch,0b26e6edch
+ DD 0ee5a5ab4h,0ee5a5ab4h
+ DD 0fba0a05bh,0fba0a05bh
+ DD 0f65252a4h,0f65252a4h
+ DD 04d3b3b76h,04d3b3b76h
+ DD 061d6d6b7h,061d6d6b7h
+ DD 0ceb3b37dh,0ceb3b37dh
+ DD 07b292952h,07b292952h
+ DD 03ee3e3ddh,03ee3e3ddh
+ DD 0712f2f5eh,0712f2f5eh
+ DD 097848413h,097848413h
+ DD 0f55353a6h,0f55353a6h
+ DD 068d1d1b9h,068d1d1b9h
+ DD 000000000h,000000000h
+ DD 02cededc1h,02cededc1h
+ DD 060202040h,060202040h
+ DD 01ffcfce3h,01ffcfce3h
+ DD 0c8b1b179h,0c8b1b179h
+ DD 0ed5b5bb6h,0ed5b5bb6h
+ DD 0be6a6ad4h,0be6a6ad4h
+ DD 046cbcb8dh,046cbcb8dh
+ DD 0d9bebe67h,0d9bebe67h
+ DD 04b393972h,04b393972h
+ DD 0de4a4a94h,0de4a4a94h
+ DD 0d44c4c98h,0d44c4c98h
+ DD 0e85858b0h,0e85858b0h
+ DD 04acfcf85h,04acfcf85h
+ DD 06bd0d0bbh,06bd0d0bbh
+ DD 02aefefc5h,02aefefc5h
+ DD 0e5aaaa4fh,0e5aaaa4fh
+ DD 016fbfbedh,016fbfbedh
+ DD 0c5434386h,0c5434386h
+ DD 0d74d4d9ah,0d74d4d9ah
+ DD 055333366h,055333366h
+ DD 094858511h,094858511h
+ DD 0cf45458ah,0cf45458ah
+ DD 010f9f9e9h,010f9f9e9h
+ DD 006020204h,006020204h
+ DD 0817f7ffeh,0817f7ffeh
+ DD 0f05050a0h,0f05050a0h
+ DD 0443c3c78h,0443c3c78h
+ DD 0ba9f9f25h,0ba9f9f25h
+ DD 0e3a8a84bh,0e3a8a84bh
+ DD 0f35151a2h,0f35151a2h
+ DD 0fea3a35dh,0fea3a35dh
+ DD 0c0404080h,0c0404080h
+ DD 08a8f8f05h,08a8f8f05h
+ DD 0ad92923fh,0ad92923fh
+ DD 0bc9d9d21h,0bc9d9d21h
+ DD 048383870h,048383870h
+ DD 004f5f5f1h,004f5f5f1h
+ DD 0dfbcbc63h,0dfbcbc63h
+ DD 0c1b6b677h,0c1b6b677h
+ DD 075dadaafh,075dadaafh
+ DD 063212142h,063212142h
+ DD 030101020h,030101020h
+ DD 01affffe5h,01affffe5h
+ DD 00ef3f3fdh,00ef3f3fdh
+ DD 06dd2d2bfh,06dd2d2bfh
+ DD 04ccdcd81h,04ccdcd81h
+ DD 0140c0c18h,0140c0c18h
+ DD 035131326h,035131326h
+ DD 02fececc3h,02fececc3h
+ DD 0e15f5fbeh,0e15f5fbeh
+ DD 0a2979735h,0a2979735h
+ DD 0cc444488h,0cc444488h
+ DD 03917172eh,03917172eh
+ DD 057c4c493h,057c4c493h
+ DD 0f2a7a755h,0f2a7a755h
+ DD 0827e7efch,0827e7efch
+ DD 0473d3d7ah,0473d3d7ah
+ DD 0ac6464c8h,0ac6464c8h
+ DD 0e75d5dbah,0e75d5dbah
+ DD 02b191932h,02b191932h
+ DD 0957373e6h,0957373e6h
+ DD 0a06060c0h,0a06060c0h
+ DD 098818119h,098818119h
+ DD 0d14f4f9eh,0d14f4f9eh
+ DD 07fdcdca3h,07fdcdca3h
+ DD 066222244h,066222244h
+ DD 07e2a2a54h,07e2a2a54h
+ DD 0ab90903bh,0ab90903bh
+ DD 08388880bh,08388880bh
+ DD 0ca46468ch,0ca46468ch
+ DD 029eeeec7h,029eeeec7h
+ DD 0d3b8b86bh,0d3b8b86bh
+ DD 03c141428h,03c141428h
+ DD 079dedea7h,079dedea7h
+ DD 0e25e5ebch,0e25e5ebch
+ DD 01d0b0b16h,01d0b0b16h
+ DD 076dbdbadh,076dbdbadh
+ DD 03be0e0dbh,03be0e0dbh
+ DD 056323264h,056323264h
+ DD 04e3a3a74h,04e3a3a74h
+ DD 01e0a0a14h,01e0a0a14h
+ DD 0db494992h,0db494992h
+ DD 00a06060ch,00a06060ch
+ DD 06c242448h,06c242448h
+ DD 0e45c5cb8h,0e45c5cb8h
+ DD 05dc2c29fh,05dc2c29fh
+ DD 06ed3d3bdh,06ed3d3bdh
+ DD 0efacac43h,0efacac43h
+ DD 0a66262c4h,0a66262c4h
+ DD 0a8919139h,0a8919139h
+ DD 0a4959531h,0a4959531h
+ DD 037e4e4d3h,037e4e4d3h
+ DD 08b7979f2h,08b7979f2h
+ DD 032e7e7d5h,032e7e7d5h
+ DD 043c8c88bh,043c8c88bh
+ DD 05937376eh,05937376eh
+ DD 0b76d6ddah,0b76d6ddah
+ DD 08c8d8d01h,08c8d8d01h
+ DD 064d5d5b1h,064d5d5b1h
+ DD 0d24e4e9ch,0d24e4e9ch
+ DD 0e0a9a949h,0e0a9a949h
+ DD 0b46c6cd8h,0b46c6cd8h
+ DD 0fa5656ach,0fa5656ach
+ DD 007f4f4f3h,007f4f4f3h
+ DD 025eaeacfh,025eaeacfh
+ DD 0af6565cah,0af6565cah
+ DD 08e7a7af4h,08e7a7af4h
+ DD 0e9aeae47h,0e9aeae47h
+ DD 018080810h,018080810h
+ DD 0d5baba6fh,0d5baba6fh
+ DD 0887878f0h,0887878f0h
+ DD 06f25254ah,06f25254ah
+ DD 0722e2e5ch,0722e2e5ch
+ DD 0241c1c38h,0241c1c38h
+ DD 0f1a6a657h,0f1a6a657h
+ DD 0c7b4b473h,0c7b4b473h
+ DD 051c6c697h,051c6c697h
+ DD 023e8e8cbh,023e8e8cbh
+ DD 07cdddda1h,07cdddda1h
+ DD 09c7474e8h,09c7474e8h
+ DD 0211f1f3eh,0211f1f3eh
+ DD 0dd4b4b96h,0dd4b4b96h
+ DD 0dcbdbd61h,0dcbdbd61h
+ DD 0868b8b0dh,0868b8b0dh
+ DD 0858a8a0fh,0858a8a0fh
+ DD 0907070e0h,0907070e0h
+ DD 0423e3e7ch,0423e3e7ch
+ DD 0c4b5b571h,0c4b5b571h
+ DD 0aa6666cch,0aa6666cch
+ DD 0d8484890h,0d8484890h
+ DD 005030306h,005030306h
+ DD 001f6f6f7h,001f6f6f7h
+ DD 0120e0e1ch,0120e0e1ch
+ DD 0a36161c2h,0a36161c2h
+ DD 05f35356ah,05f35356ah
+ DD 0f95757aeh,0f95757aeh
+ DD 0d0b9b969h,0d0b9b969h
+ DD 091868617h,091868617h
+ DD 058c1c199h,058c1c199h
+ DD 0271d1d3ah,0271d1d3ah
+ DD 0b99e9e27h,0b99e9e27h
+ DD 038e1e1d9h,038e1e1d9h
+ DD 013f8f8ebh,013f8f8ebh
+ DD 0b398982bh,0b398982bh
+ DD 033111122h,033111122h
+ DD 0bb6969d2h,0bb6969d2h
+ DD 070d9d9a9h,070d9d9a9h
+ DD 0898e8e07h,0898e8e07h
+ DD 0a7949433h,0a7949433h
+ DD 0b69b9b2dh,0b69b9b2dh
+ DD 0221e1e3ch,0221e1e3ch
+ DD 092878715h,092878715h
+ DD 020e9e9c9h,020e9e9c9h
+ DD 049cece87h,049cece87h
+ DD 0ff5555aah,0ff5555aah
+ DD 078282850h,078282850h
+ DD 07adfdfa5h,07adfdfa5h
+ DD 08f8c8c03h,08f8c8c03h
+ DD 0f8a1a159h,0f8a1a159h
+ DD 080898909h,080898909h
+ DD 0170d0d1ah,0170d0d1ah
+ DD 0dabfbf65h,0dabfbf65h
+ DD 031e6e6d7h,031e6e6d7h
+ DD 0c6424284h,0c6424284h
+ DD 0b86868d0h,0b86868d0h
+ DD 0c3414182h,0c3414182h
+ DD 0b0999929h,0b0999929h
+ DD 0772d2d5ah,0772d2d5ah
+ DD 0110f0f1eh,0110f0f1eh
+ DD 0cbb0b07bh,0cbb0b07bh
+ DD 0fc5454a8h,0fc5454a8h
+ DD 0d6bbbb6dh,0d6bbbb6dh
+ DD 03a16162ch,03a16162ch
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+ DD 000000001h,000000002h,000000004h,000000008h
+ DD 000000010h,000000020h,000000040h,000000080h
+ DD 00000001bh,000000036h,080808080h,080808080h
+ DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh
+ALIGN 64
+$L$AES_Td::
+ DD 050a7f451h,050a7f451h
+ DD 05365417eh,05365417eh
+ DD 0c3a4171ah,0c3a4171ah
+ DD 0965e273ah,0965e273ah
+ DD 0cb6bab3bh,0cb6bab3bh
+ DD 0f1459d1fh,0f1459d1fh
+ DD 0ab58faach,0ab58faach
+ DD 09303e34bh,09303e34bh
+ DD 055fa3020h,055fa3020h
+ DD 0f66d76adh,0f66d76adh
+ DD 09176cc88h,09176cc88h
+ DD 0254c02f5h,0254c02f5h
+ DD 0fcd7e54fh,0fcd7e54fh
+ DD 0d7cb2ac5h,0d7cb2ac5h
+ DD 080443526h,080443526h
+ DD 08fa362b5h,08fa362b5h
+ DD 0495ab1deh,0495ab1deh
+ DD 0671bba25h,0671bba25h
+ DD 0980eea45h,0980eea45h
+ DD 0e1c0fe5dh,0e1c0fe5dh
+ DD 002752fc3h,002752fc3h
+ DD 012f04c81h,012f04c81h
+ DD 0a397468dh,0a397468dh
+ DD 0c6f9d36bh,0c6f9d36bh
+ DD 0e75f8f03h,0e75f8f03h
+ DD 0959c9215h,0959c9215h
+ DD 0eb7a6dbfh,0eb7a6dbfh
+ DD 0da595295h,0da595295h
+ DD 02d83bed4h,02d83bed4h
+ DD 0d3217458h,0d3217458h
+ DD 02969e049h,02969e049h
+ DD 044c8c98eh,044c8c98eh
+ DD 06a89c275h,06a89c275h
+ DD 078798ef4h,078798ef4h
+ DD 06b3e5899h,06b3e5899h
+ DD 0dd71b927h,0dd71b927h
+ DD 0b64fe1beh,0b64fe1beh
+ DD 017ad88f0h,017ad88f0h
+ DD 066ac20c9h,066ac20c9h
+ DD 0b43ace7dh,0b43ace7dh
+ DD 0184adf63h,0184adf63h
+ DD 082311ae5h,082311ae5h
+ DD 060335197h,060335197h
+ DD 0457f5362h,0457f5362h
+ DD 0e07764b1h,0e07764b1h
+ DD 084ae6bbbh,084ae6bbbh
+ DD 01ca081feh,01ca081feh
+ DD 0942b08f9h,0942b08f9h
+ DD 058684870h,058684870h
+ DD 019fd458fh,019fd458fh
+ DD 0876cde94h,0876cde94h
+ DD 0b7f87b52h,0b7f87b52h
+ DD 023d373abh,023d373abh
+ DD 0e2024b72h,0e2024b72h
+ DD 0578f1fe3h,0578f1fe3h
+ DD 02aab5566h,02aab5566h
+ DD 00728ebb2h,00728ebb2h
+ DD 003c2b52fh,003c2b52fh
+ DD 09a7bc586h,09a7bc586h
+ DD 0a50837d3h,0a50837d3h
+ DD 0f2872830h,0f2872830h
+ DD 0b2a5bf23h,0b2a5bf23h
+ DD 0ba6a0302h,0ba6a0302h
+ DD 05c8216edh,05c8216edh
+ DD 02b1ccf8ah,02b1ccf8ah
+ DD 092b479a7h,092b479a7h
+ DD 0f0f207f3h,0f0f207f3h
+ DD 0a1e2694eh,0a1e2694eh
+ DD 0cdf4da65h,0cdf4da65h
+ DD 0d5be0506h,0d5be0506h
+ DD 01f6234d1h,01f6234d1h
+ DD 08afea6c4h,08afea6c4h
+ DD 09d532e34h,09d532e34h
+ DD 0a055f3a2h,0a055f3a2h
+ DD 032e18a05h,032e18a05h
+ DD 075ebf6a4h,075ebf6a4h
+ DD 039ec830bh,039ec830bh
+ DD 0aaef6040h,0aaef6040h
+ DD 0069f715eh,0069f715eh
+ DD 051106ebdh,051106ebdh
+ DD 0f98a213eh,0f98a213eh
+ DD 03d06dd96h,03d06dd96h
+ DD 0ae053eddh,0ae053eddh
+ DD 046bde64dh,046bde64dh
+ DD 0b58d5491h,0b58d5491h
+ DD 0055dc471h,0055dc471h
+ DD 06fd40604h,06fd40604h
+ DD 0ff155060h,0ff155060h
+ DD 024fb9819h,024fb9819h
+ DD 097e9bdd6h,097e9bdd6h
+ DD 0cc434089h,0cc434089h
+ DD 0779ed967h,0779ed967h
+ DD 0bd42e8b0h,0bd42e8b0h
+ DD 0888b8907h,0888b8907h
+ DD 0385b19e7h,0385b19e7h
+ DD 0dbeec879h,0dbeec879h
+ DD 0470a7ca1h,0470a7ca1h
+ DD 0e90f427ch,0e90f427ch
+ DD 0c91e84f8h,0c91e84f8h
+ DD 000000000h,000000000h
+ DD 083868009h,083868009h
+ DD 048ed2b32h,048ed2b32h
+ DD 0ac70111eh,0ac70111eh
+ DD 04e725a6ch,04e725a6ch
+ DD 0fbff0efdh,0fbff0efdh
+ DD 05638850fh,05638850fh
+ DD 01ed5ae3dh,01ed5ae3dh
+ DD 027392d36h,027392d36h
+ DD 064d90f0ah,064d90f0ah
+ DD 021a65c68h,021a65c68h
+ DD 0d1545b9bh,0d1545b9bh
+ DD 03a2e3624h,03a2e3624h
+ DD 0b1670a0ch,0b1670a0ch
+ DD 00fe75793h,00fe75793h
+ DD 0d296eeb4h,0d296eeb4h
+ DD 09e919b1bh,09e919b1bh
+ DD 04fc5c080h,04fc5c080h
+ DD 0a220dc61h,0a220dc61h
+ DD 0694b775ah,0694b775ah
+ DD 0161a121ch,0161a121ch
+ DD 00aba93e2h,00aba93e2h
+ DD 0e52aa0c0h,0e52aa0c0h
+ DD 043e0223ch,043e0223ch
+ DD 01d171b12h,01d171b12h
+ DD 00b0d090eh,00b0d090eh
+ DD 0adc78bf2h,0adc78bf2h
+ DD 0b9a8b62dh,0b9a8b62dh
+ DD 0c8a91e14h,0c8a91e14h
+ DD 08519f157h,08519f157h
+ DD 04c0775afh,04c0775afh
+ DD 0bbdd99eeh,0bbdd99eeh
+ DD 0fd607fa3h,0fd607fa3h
+ DD 09f2601f7h,09f2601f7h
+ DD 0bcf5725ch,0bcf5725ch
+ DD 0c53b6644h,0c53b6644h
+ DD 0347efb5bh,0347efb5bh
+ DD 07629438bh,07629438bh
+ DD 0dcc623cbh,0dcc623cbh
+ DD 068fcedb6h,068fcedb6h
+ DD 063f1e4b8h,063f1e4b8h
+ DD 0cadc31d7h,0cadc31d7h
+ DD 010856342h,010856342h
+ DD 040229713h,040229713h
+ DD 02011c684h,02011c684h
+ DD 07d244a85h,07d244a85h
+ DD 0f83dbbd2h,0f83dbbd2h
+ DD 01132f9aeh,01132f9aeh
+ DD 06da129c7h,06da129c7h
+ DD 04b2f9e1dh,04b2f9e1dh
+ DD 0f330b2dch,0f330b2dch
+ DD 0ec52860dh,0ec52860dh
+ DD 0d0e3c177h,0d0e3c177h
+ DD 06c16b32bh,06c16b32bh
+ DD 099b970a9h,099b970a9h
+ DD 0fa489411h,0fa489411h
+ DD 02264e947h,02264e947h
+ DD 0c48cfca8h,0c48cfca8h
+ DD 01a3ff0a0h,01a3ff0a0h
+ DD 0d82c7d56h,0d82c7d56h
+ DD 0ef903322h,0ef903322h
+ DD 0c74e4987h,0c74e4987h
+ DD 0c1d138d9h,0c1d138d9h
+ DD 0fea2ca8ch,0fea2ca8ch
+ DD 0360bd498h,0360bd498h
+ DD 0cf81f5a6h,0cf81f5a6h
+ DD 028de7aa5h,028de7aa5h
+ DD 0268eb7dah,0268eb7dah
+ DD 0a4bfad3fh,0a4bfad3fh
+ DD 0e49d3a2ch,0e49d3a2ch
+ DD 00d927850h,00d927850h
+ DD 09bcc5f6ah,09bcc5f6ah
+ DD 062467e54h,062467e54h
+ DD 0c2138df6h,0c2138df6h
+ DD 0e8b8d890h,0e8b8d890h
+ DD 05ef7392eh,05ef7392eh
+ DD 0f5afc382h,0f5afc382h
+ DD 0be805d9fh,0be805d9fh
+ DD 07c93d069h,07c93d069h
+ DD 0a92dd56fh,0a92dd56fh
+ DD 0b31225cfh,0b31225cfh
+ DD 03b99acc8h,03b99acc8h
+ DD 0a77d1810h,0a77d1810h
+ DD 06e639ce8h,06e639ce8h
+ DD 07bbb3bdbh,07bbb3bdbh
+ DD 0097826cdh,0097826cdh
+ DD 0f418596eh,0f418596eh
+ DD 001b79aech,001b79aech
+ DD 0a89a4f83h,0a89a4f83h
+ DD 0656e95e6h,0656e95e6h
+ DD 07ee6ffaah,07ee6ffaah
+ DD 008cfbc21h,008cfbc21h
+ DD 0e6e815efh,0e6e815efh
+ DD 0d99be7bah,0d99be7bah
+ DD 0ce366f4ah,0ce366f4ah
+ DD 0d4099feah,0d4099feah
+ DD 0d67cb029h,0d67cb029h
+ DD 0afb2a431h,0afb2a431h
+ DD 031233f2ah,031233f2ah
+ DD 03094a5c6h,03094a5c6h
+ DD 0c066a235h,0c066a235h
+ DD 037bc4e74h,037bc4e74h
+ DD 0a6ca82fch,0a6ca82fch
+ DD 0b0d090e0h,0b0d090e0h
+ DD 015d8a733h,015d8a733h
+ DD 04a9804f1h,04a9804f1h
+ DD 0f7daec41h,0f7daec41h
+ DD 00e50cd7fh,00e50cd7fh
+ DD 02ff69117h,02ff69117h
+ DD 08dd64d76h,08dd64d76h
+ DD 04db0ef43h,04db0ef43h
+ DD 0544daacch,0544daacch
+ DD 0df0496e4h,0df0496e4h
+ DD 0e3b5d19eh,0e3b5d19eh
+ DD 01b886a4ch,01b886a4ch
+ DD 0b81f2cc1h,0b81f2cc1h
+ DD 07f516546h,07f516546h
+ DD 004ea5e9dh,004ea5e9dh
+ DD 05d358c01h,05d358c01h
+ DD 0737487fah,0737487fah
+ DD 02e410bfbh,02e410bfbh
+ DD 05a1d67b3h,05a1d67b3h
+ DD 052d2db92h,052d2db92h
+ DD 0335610e9h,0335610e9h
+ DD 01347d66dh,01347d66dh
+ DD 08c61d79ah,08c61d79ah
+ DD 07a0ca137h,07a0ca137h
+ DD 08e14f859h,08e14f859h
+ DD 0893c13ebh,0893c13ebh
+ DD 0ee27a9ceh,0ee27a9ceh
+ DD 035c961b7h,035c961b7h
+ DD 0ede51ce1h,0ede51ce1h
+ DD 03cb1477ah,03cb1477ah
+ DD 059dfd29ch,059dfd29ch
+ DD 03f73f255h,03f73f255h
+ DD 079ce1418h,079ce1418h
+ DD 0bf37c773h,0bf37c773h
+ DD 0eacdf753h,0eacdf753h
+ DD 05baafd5fh,05baafd5fh
+ DD 0146f3ddfh,0146f3ddfh
+ DD 086db4478h,086db4478h
+ DD 081f3afcah,081f3afcah
+ DD 03ec468b9h,03ec468b9h
+ DD 02c342438h,02c342438h
+ DD 05f40a3c2h,05f40a3c2h
+ DD 072c31d16h,072c31d16h
+ DD 00c25e2bch,00c25e2bch
+ DD 08b493c28h,08b493c28h
+ DD 041950dffh,041950dffh
+ DD 07101a839h,07101a839h
+ DD 0deb30c08h,0deb30c08h
+ DD 09ce4b4d8h,09ce4b4d8h
+ DD 090c15664h,090c15664h
+ DD 06184cb7bh,06184cb7bh
+ DD 070b632d5h,070b632d5h
+ DD 0745c6c48h,0745c6c48h
+ DD 04257b8d0h,04257b8d0h
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB 62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+block_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_block_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_block_prologue
+
+ mov rax,QWORD PTR[24+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_block_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+block_se_handler ENDP
+
+
+ALIGN 16
+key_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_key_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_key_prologue
+
+ lea rax,QWORD PTR[56+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_key_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+key_se_handler ENDP
+
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_prologue
+
+ lea r10,QWORD PTR[$L$cbc_fast_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+ lea r10,QWORD PTR[$L$cbc_slow_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_body
+
+ lea r10,QWORD PTR[$L$cbc_slow_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+$L$in_cbc_body::
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$cbc_epilogue]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ lea rax,QWORD PTR[8+rax]
+
+ lea r10,QWORD PTR[$L$cbc_popfq]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ mov rax,QWORD PTR[8+rax]
+ lea rax,QWORD PTR[56+rax]
+
+$L$in_cbc_frame_setup::
+ mov rbx,QWORD PTR[((-16))+rax]
+ mov rbp,QWORD PTR[((-24))+rax]
+ mov r12,QWORD PTR[((-32))+rax]
+ mov r13,QWORD PTR[((-40))+rax]
+ mov r14,QWORD PTR[((-48))+rax]
+ mov r15,QWORD PTR[((-56))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_cbc_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+$L$common_seh_exit::
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_asm_AES_encrypt
+ DD imagerel $L$SEH_end_asm_AES_encrypt
+ DD imagerel $L$SEH_info_asm_AES_encrypt
+
+ DD imagerel $L$SEH_begin_asm_AES_decrypt
+ DD imagerel $L$SEH_end_asm_AES_decrypt
+ DD imagerel $L$SEH_info_asm_AES_decrypt
+
+ DD imagerel $L$SEH_begin_asm_AES_set_encrypt_key
+ DD imagerel $L$SEH_end_asm_AES_set_encrypt_key
+ DD imagerel $L$SEH_info_asm_AES_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_asm_AES_set_decrypt_key
+ DD imagerel $L$SEH_end_asm_AES_set_decrypt_key
+ DD imagerel $L$SEH_info_asm_AES_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_asm_AES_cbc_encrypt
+ DD imagerel $L$SEH_end_asm_AES_cbc_encrypt
+ DD imagerel $L$SEH_info_asm_AES_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_asm_AES_encrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue
+$L$SEH_info_asm_AES_decrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue
+$L$SEH_info_asm_AES_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue
+$L$SEH_info_asm_AES_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue
+$L$SEH_info_asm_AES_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/aes/aesni-x86_64.asm b/win-x86_64/crypto/aes/aesni-x86_64.asm
new file mode 100644
index 0000000..53d8afc
--- /dev/null
+++ b/win-x86_64/crypto/aes/aesni-x86_64.asm
@@ -0,0 +1,3631 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+PUBLIC aesni_encrypt
+
+ALIGN 16
+aesni_encrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_1::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_1
+DB 102,15,56,221,209
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_encrypt ENDP
+
+PUBLIC aesni_decrypt
+
+ALIGN 16
+aesni_decrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_dec1_2::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_dec1_2
+DB 102,15,56,223,209
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_decrypt ENDP
+
+ALIGN 16
+_aesni_encrypt2 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$enc_loop2::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$enc_loop2
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt2 ENDP
+
+ALIGN 16
+_aesni_decrypt2 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$dec_loop2::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$dec_loop2
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt2 ENDP
+
+ALIGN 16
+_aesni_encrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$enc_loop3::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$enc_loop3
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt3 ENDP
+
+ALIGN 16
+_aesni_decrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$dec_loop3::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$dec_loop3
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt3 ENDP
+
+ALIGN 16
+_aesni_encrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 00fh,01fh,000h
+ add rax,16
+
+$L$enc_loop4::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$enc_loop4
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt4 ENDP
+
+ALIGN 16
+_aesni_decrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 00fh,01fh,000h
+ add rax,16
+
+$L$dec_loop4::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$dec_loop4
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt4 ENDP
+
+ALIGN 16
+_aesni_encrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+DB 102,15,56,220,209
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,217
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+DB 102,15,56,220,225
+ pxor xmm7,xmm0
+ add rax,16
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jmp $L$enc_loop6_enter
+ALIGN 16
+$L$enc_loop6::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+$L$enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$enc_loop6
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+DB 102,15,56,221,240
+DB 102,15,56,221,248
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt6 ENDP
+
+ALIGN 16
+_aesni_decrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+DB 102,15,56,222,209
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,222,217
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+DB 102,15,56,222,225
+ pxor xmm7,xmm0
+ add rax,16
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jmp $L$dec_loop6_enter
+ALIGN 16
+$L$dec_loop6::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+$L$dec_loop6_enter::
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$dec_loop6
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+DB 102,15,56,223,240
+DB 102,15,56,223,248
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt6 ENDP
+
+ALIGN 16
+_aesni_encrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,209
+ add rax,16
+ pxor xmm7,xmm0
+DB 102,15,56,220,217
+ pxor xmm8,xmm0
+ pxor xmm9,xmm0
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jmp $L$enc_loop8_enter
+ALIGN 16
+$L$enc_loop8::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+$L$enc_loop8_enter::
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$enc_loop8
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+DB 102,15,56,221,240
+DB 102,15,56,221,248
+DB 102,68,15,56,221,192
+DB 102,68,15,56,221,200
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt8 ENDP
+
+ALIGN 16
+_aesni_decrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,222,209
+ add rax,16
+ pxor xmm7,xmm0
+DB 102,15,56,222,217
+ pxor xmm8,xmm0
+ pxor xmm9,xmm0
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jmp $L$dec_loop8_enter
+ALIGN 16
+$L$dec_loop8::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+$L$dec_loop8_enter::
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$dec_loop8
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+DB 102,15,56,223,240
+DB 102,15,56,223,248
+DB 102,68,15,56,223,192
+DB 102,68,15,56,223,200
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt8 ENDP
+PUBLIC aesni_ecb_encrypt
+
+ALIGN 16
+aesni_ecb_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ecb_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ecb_enc_body::
+ and rdx,-16
+ jz $L$ecb_ret
+
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov r10d,eax
+ test r8d,r8d
+ jz $L$ecb_decrypt
+
+ cmp rdx,080h
+ jb $L$ecb_enc_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_enc_loop8_enter
+ALIGN 16
+$L$ecb_enc_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_enc_loop8_enter::
+
+ call _aesni_encrypt8
+
+ sub rdx,080h
+ jnc $L$ecb_enc_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_enc_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_enc_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_enc_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_enc_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_enc_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_enc_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_enc_six
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ call _aesni_encrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_3::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_3
+DB 102,15,56,221,209
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_two::
+ call _aesni_encrypt2
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_three::
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_four::
+ call _aesni_encrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_five::
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_six::
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ jmp $L$ecb_ret
+
+ALIGN 16
+$L$ecb_decrypt::
+ cmp rdx,080h
+ jb $L$ecb_dec_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_dec_loop8_enter
+ALIGN 16
+$L$ecb_dec_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_dec_loop8_enter::
+
+ call _aesni_decrypt8
+
+ movups xmm0,XMMWORD PTR[r11]
+ sub rdx,080h
+ jnc $L$ecb_dec_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_dec_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_dec_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_dec_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_dec_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_dec_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_dec_six
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movups xmm0,XMMWORD PTR[rcx]
+ call _aesni_decrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_4::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_4
+DB 102,15,56,223,209
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_two::
+ call _aesni_decrypt2
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_three::
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_four::
+ call _aesni_decrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_five::
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_six::
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+
+$L$ecb_ret::
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ecb_enc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ecb_encrypt::
+aesni_ecb_encrypt ENDP
+PUBLIC aesni_ccm64_encrypt_blocks
+
+ALIGN 16
+aesni_ccm64_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_enc_body::
+ mov eax,DWORD PTR[240+rcx]
+ movdqu xmm6,XMMWORD PTR[r8]
+ movdqa xmm9,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ shl eax,4
+ mov r10d,16
+ lea r11,QWORD PTR[rcx]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm2,xmm6
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+DB 102,15,56,0,247
+ sub r10,rax
+ jmp $L$ccm64_enc_outer
+ALIGN 16
+$L$ccm64_enc_outer::
+ movups xmm0,XMMWORD PTR[r11]
+ mov rax,r10
+ movups xmm8,XMMWORD PTR[rdi]
+
+ xorps xmm2,xmm0
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm0,xmm8
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[32+r11]
+
+$L$ccm64_enc2_loop::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$ccm64_enc2_loop
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ paddq xmm6,xmm9
+ dec rdx
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm6
+ movups XMMWORD PTR[rsi],xmm8
+DB 102,15,56,0,215
+ lea rsi,QWORD PTR[16+rsi]
+ jnz $L$ccm64_enc_outer
+
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_enc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_encrypt_blocks::
+aesni_ccm64_encrypt_blocks ENDP
+PUBLIC aesni_ccm64_decrypt_blocks
+
+ALIGN 16
+aesni_ccm64_decrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_decrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_dec_body::
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm6,XMMWORD PTR[r8]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm9,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ movaps xmm2,xmm6
+ mov r10d,eax
+ mov r11,rcx
+DB 102,15,56,0,247
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_5::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_5
+DB 102,15,56,221,209
+ shl r10d,4
+ mov eax,16
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm6,xmm9
+ lea rdi,QWORD PTR[16+rdi]
+ sub rax,r10
+ lea rcx,QWORD PTR[32+r10*1+r11]
+ mov r10,rax
+ jmp $L$ccm64_dec_outer
+ALIGN 16
+$L$ccm64_dec_outer::
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm6
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+
+ sub rdx,1
+ jz $L$ccm64_dec_break
+
+ movups xmm0,XMMWORD PTR[r11]
+ mov rax,r10
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ xorps xmm2,xmm0
+ xorps xmm3,xmm8
+ movups xmm0,XMMWORD PTR[32+r11]
+ jmp $L$ccm64_dec2_loop
+ALIGN 16
+$L$ccm64_dec2_loop::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD PTR[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx]
+ jnz $L$ccm64_dec2_loop
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm6,xmm9
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+ lea rdi,QWORD PTR[16+rdi]
+ jmp $L$ccm64_dec_outer
+
+ALIGN 16
+$L$ccm64_dec_break::
+
+ mov eax,DWORD PTR[240+r11]
+ movups xmm0,XMMWORD PTR[r11]
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea r11,QWORD PTR[32+r11]
+ xorps xmm3,xmm8
+$L$oop_enc1_6::
+DB 102,15,56,220,217
+ dec eax
+ movups xmm1,XMMWORD PTR[r11]
+ lea r11,QWORD PTR[16+r11]
+ jnz $L$oop_enc1_6
+DB 102,15,56,221,217
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_dec_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_decrypt_blocks::
+aesni_ccm64_decrypt_blocks ENDP
+PUBLIC aesni_ctr32_encrypt_blocks
+
+ALIGN 16
+aesni_ctr32_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ctr32_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ lea rax,QWORD PTR[rsp]
+ push rbp
+ sub rsp,288
+ and rsp,-16
+ movaps XMMWORD PTR[(-168)+rax],xmm6
+ movaps XMMWORD PTR[(-152)+rax],xmm7
+ movaps XMMWORD PTR[(-136)+rax],xmm8
+ movaps XMMWORD PTR[(-120)+rax],xmm9
+ movaps XMMWORD PTR[(-104)+rax],xmm10
+ movaps XMMWORD PTR[(-88)+rax],xmm11
+ movaps XMMWORD PTR[(-72)+rax],xmm12
+ movaps XMMWORD PTR[(-56)+rax],xmm13
+ movaps XMMWORD PTR[(-40)+rax],xmm14
+ movaps XMMWORD PTR[(-24)+rax],xmm15
+$L$ctr32_body::
+ lea rbp,QWORD PTR[((-8))+rax]
+
+ cmp rdx,1
+ je $L$ctr32_one_shortcut
+
+ movdqu xmm2,XMMWORD PTR[r8]
+ movdqu xmm0,XMMWORD PTR[rcx]
+ mov r8d,DWORD PTR[12+r8]
+ pxor xmm2,xmm0
+ mov r11d,DWORD PTR[12+rcx]
+ movdqa XMMWORD PTR[rsp],xmm2
+ bswap r8d
+ movdqa xmm3,xmm2
+ movdqa xmm4,xmm2
+ movdqa xmm5,xmm2
+ movdqa XMMWORD PTR[64+rsp],xmm2
+ movdqa XMMWORD PTR[80+rsp],xmm2
+ movdqa XMMWORD PTR[96+rsp],xmm2
+ mov r10,rdx
+ movdqa XMMWORD PTR[112+rsp],xmm2
+
+ lea rax,QWORD PTR[1+r8]
+ lea rdx,QWORD PTR[2+r8]
+ bswap eax
+ bswap edx
+ xor eax,r11d
+ xor edx,r11d
+DB 102,15,58,34,216,3
+ lea rax,QWORD PTR[3+r8]
+ movdqa XMMWORD PTR[16+rsp],xmm3
+DB 102,15,58,34,226,3
+ bswap eax
+ mov rdx,r10
+ lea r10,QWORD PTR[4+r8]
+ movdqa XMMWORD PTR[32+rsp],xmm4
+ xor eax,r11d
+ bswap r10d
+DB 102,15,58,34,232,3
+ xor r10d,r11d
+ movdqa XMMWORD PTR[48+rsp],xmm5
+ lea r9,QWORD PTR[5+r8]
+ mov DWORD PTR[((64+12))+rsp],r10d
+ bswap r9d
+ lea r10,QWORD PTR[6+r8]
+ mov eax,DWORD PTR[240+rcx]
+ xor r9d,r11d
+ bswap r10d
+ mov DWORD PTR[((80+12))+rsp],r9d
+ xor r10d,r11d
+ lea r9,QWORD PTR[7+r8]
+ mov DWORD PTR[((96+12))+rsp],r10d
+ bswap r9d
+ mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ xor r9d,r11d
+ and r10d,71303168
+ mov DWORD PTR[((112+12))+rsp],r9d
+
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ movdqa xmm7,XMMWORD PTR[80+rsp]
+
+ cmp rdx,8
+ jb $L$ctr32_tail
+
+ sub rdx,6
+ cmp r10d,4194304
+ je $L$ctr32_6x
+
+ lea rcx,QWORD PTR[128+rcx]
+ sub rdx,2
+ jmp $L$ctr32_loop8
+
+ALIGN 16
+$L$ctr32_6x::
+ shl eax,4
+ mov r10d,48
+ bswap r11d
+ lea rcx,QWORD PTR[32+rax*1+rcx]
+ sub r10,rax
+ jmp $L$ctr32_loop6
+
+ALIGN 16
+$L$ctr32_loop6::
+ add r8d,6
+ movups xmm0,XMMWORD PTR[((-48))+r10*1+rcx]
+DB 102,15,56,220,209
+ mov eax,r8d
+ xor eax,r11d
+DB 102,15,56,220,217
+DB 00fh,038h,0f1h,044h,024h,12
+ lea eax,DWORD PTR[1+r8]
+DB 102,15,56,220,225
+ xor eax,r11d
+DB 00fh,038h,0f1h,044h,024h,28
+DB 102,15,56,220,233
+ lea eax,DWORD PTR[2+r8]
+ xor eax,r11d
+DB 102,15,56,220,241
+DB 00fh,038h,0f1h,044h,024h,44
+ lea eax,DWORD PTR[3+r8]
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[((-32))+r10*1+rcx]
+ xor eax,r11d
+
+DB 102,15,56,220,208
+DB 00fh,038h,0f1h,044h,024h,60
+ lea eax,DWORD PTR[4+r8]
+DB 102,15,56,220,216
+ xor eax,r11d
+DB 00fh,038h,0f1h,044h,024h,76
+DB 102,15,56,220,224
+ lea eax,DWORD PTR[5+r8]
+ xor eax,r11d
+DB 102,15,56,220,232
+DB 00fh,038h,0f1h,044h,024h,92
+ mov rax,r10
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD PTR[((-16))+r10*1+rcx]
+
+ call $L$enc_loop6
+
+ movdqu xmm8,XMMWORD PTR[rdi]
+ movdqu xmm9,XMMWORD PTR[16+rdi]
+ movdqu xmm10,XMMWORD PTR[32+rdi]
+ movdqu xmm11,XMMWORD PTR[48+rdi]
+ movdqu xmm12,XMMWORD PTR[64+rdi]
+ movdqu xmm13,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ movups xmm1,XMMWORD PTR[((-64))+r10*1+rcx]
+ pxor xmm8,xmm2
+ movaps xmm2,XMMWORD PTR[rsp]
+ pxor xmm9,xmm3
+ movaps xmm3,XMMWORD PTR[16+rsp]
+ pxor xmm10,xmm4
+ movaps xmm4,XMMWORD PTR[32+rsp]
+ pxor xmm11,xmm5
+ movaps xmm5,XMMWORD PTR[48+rsp]
+ pxor xmm12,xmm6
+ movaps xmm6,XMMWORD PTR[64+rsp]
+ pxor xmm13,xmm7
+ movaps xmm7,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[rsi],xmm8
+ movdqu XMMWORD PTR[16+rsi],xmm9
+ movdqu XMMWORD PTR[32+rsi],xmm10
+ movdqu XMMWORD PTR[48+rsi],xmm11
+ movdqu XMMWORD PTR[64+rsi],xmm12
+ movdqu XMMWORD PTR[80+rsi],xmm13
+ lea rsi,QWORD PTR[96+rsi]
+
+ sub rdx,6
+ jnc $L$ctr32_loop6
+
+ add rdx,6
+ jz $L$ctr32_done
+
+ lea eax,DWORD PTR[((-48))+r10]
+ lea rcx,QWORD PTR[((-80))+r10*1+rcx]
+ neg eax
+ shr eax,4
+ jmp $L$ctr32_tail
+
+ALIGN 32
+$L$ctr32_loop8::
+ add r8d,8
+ movdqa xmm8,XMMWORD PTR[96+rsp]
+DB 102,15,56,220,209
+ mov r9d,r8d
+ movdqa xmm9,XMMWORD PTR[112+rsp]
+DB 102,15,56,220,217
+ bswap r9d
+ movups xmm0,XMMWORD PTR[((32-128))+rcx]
+DB 102,15,56,220,225
+ xor r9d,r11d
+ nop
+DB 102,15,56,220,233
+ mov DWORD PTR[((0+12))+rsp],r9d
+ lea r9,QWORD PTR[1+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((48-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD PTR[((16+12))+rsp],r9d
+ lea r9,QWORD PTR[2+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((64-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD PTR[((32+12))+rsp],r9d
+ lea r9,QWORD PTR[3+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((80-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD PTR[((48+12))+rsp],r9d
+ lea r9,QWORD PTR[4+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((96-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD PTR[((64+12))+rsp],r9d
+ lea r9,QWORD PTR[5+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((112-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD PTR[((80+12))+rsp],r9d
+ lea r9,QWORD PTR[6+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((128-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 066h,090h
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD PTR[((96+12))+rsp],r9d
+ lea r9,QWORD PTR[7+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((144-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+ xor r9d,r11d
+ movdqu xmm10,XMMWORD PTR[rdi]
+DB 102,15,56,220,232
+ mov DWORD PTR[((112+12))+rsp],r9d
+ cmp eax,11
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((160-128))+rcx]
+
+ jb $L$ctr32_enc_done
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((176-128))+rcx]
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((192-128))+rcx]
+ je $L$ctr32_enc_done
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD PTR[((208-128))+rcx]
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD PTR[((224-128))+rcx]
+ jmp $L$ctr32_enc_done
+
+ALIGN 16
+$L$ctr32_enc_done::
+ movdqu xmm11,XMMWORD PTR[16+rdi]
+ pxor xmm10,xmm0
+ movdqu xmm12,XMMWORD PTR[32+rdi]
+ pxor xmm11,xmm0
+ movdqu xmm13,XMMWORD PTR[48+rdi]
+ pxor xmm12,xmm0
+ movdqu xmm14,XMMWORD PTR[64+rdi]
+ pxor xmm13,xmm0
+ movdqu xmm15,XMMWORD PTR[80+rdi]
+ pxor xmm14,xmm0
+ pxor xmm15,xmm0
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movdqu xmm1,XMMWORD PTR[96+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+
+DB 102,65,15,56,221,210
+ pxor xmm1,xmm0
+ movdqu xmm10,XMMWORD PTR[((112-128))+rdi]
+DB 102,65,15,56,221,219
+ pxor xmm10,xmm0
+ movdqa xmm11,XMMWORD PTR[rsp]
+DB 102,65,15,56,221,228
+DB 102,65,15,56,221,237
+ movdqa xmm12,XMMWORD PTR[16+rsp]
+ movdqa xmm13,XMMWORD PTR[32+rsp]
+DB 102,65,15,56,221,246
+DB 102,65,15,56,221,255
+ movdqa xmm14,XMMWORD PTR[48+rsp]
+ movdqa xmm15,XMMWORD PTR[64+rsp]
+DB 102,68,15,56,221,193
+ movdqa xmm0,XMMWORD PTR[80+rsp]
+ movups xmm1,XMMWORD PTR[((16-128))+rcx]
+DB 102,69,15,56,221,202
+
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm2,xmm11
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqa xmm3,xmm12
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqa xmm4,xmm13
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqa xmm5,xmm14
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqa xmm6,xmm15
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqa xmm7,xmm0
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+
+ sub rdx,8
+ jnc $L$ctr32_loop8
+
+ add rdx,8
+ jz $L$ctr32_done
+ lea rcx,QWORD PTR[((-128))+rcx]
+
+$L$ctr32_tail::
+ lea rcx,QWORD PTR[16+rcx]
+ cmp rdx,4
+ jb $L$ctr32_loop3
+ je $L$ctr32_loop4
+
+ shl eax,4
+ movdqa xmm8,XMMWORD PTR[96+rsp]
+ pxor xmm9,xmm9
+
+ movups xmm0,XMMWORD PTR[16+rcx]
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ lea rcx,QWORD PTR[((32-16))+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,225
+ add rax,16
+ movups xmm10,XMMWORD PTR[rdi]
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+ movups xmm11,XMMWORD PTR[16+rdi]
+ movups xmm12,XMMWORD PTR[32+rdi]
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+
+ call $L$enc_loop8_enter
+
+ movdqu xmm13,XMMWORD PTR[48+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm10,XMMWORD PTR[64+rdi]
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm6,xmm10
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ cmp rdx,6
+ jb $L$ctr32_done
+
+ movups xmm11,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm11
+ movups XMMWORD PTR[80+rsi],xmm7
+ je $L$ctr32_done
+
+ movups xmm12,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm12
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ctr32_done
+
+ALIGN 32
+$L$ctr32_loop4::
+DB 102,15,56,220,209
+ lea rcx,QWORD PTR[16+rcx]
+ dec eax
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD PTR[rcx]
+ jnz $L$ctr32_loop4
+DB 102,15,56,221,209
+DB 102,15,56,221,217
+ movups xmm10,XMMWORD PTR[rdi]
+ movups xmm11,XMMWORD PTR[16+rdi]
+DB 102,15,56,221,225
+DB 102,15,56,221,233
+ movups xmm12,XMMWORD PTR[32+rdi]
+ movups xmm13,XMMWORD PTR[48+rdi]
+
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[16+rsi],xmm3
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ctr32_done
+
+ALIGN 32
+$L$ctr32_loop3::
+DB 102,15,56,220,209
+ lea rcx,QWORD PTR[16+rcx]
+ dec eax
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+ movups xmm1,XMMWORD PTR[rcx]
+ jnz $L$ctr32_loop3
+DB 102,15,56,221,209
+DB 102,15,56,221,217
+DB 102,15,56,221,225
+
+ movups xmm10,XMMWORD PTR[rdi]
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+ cmp rdx,2
+ jb $L$ctr32_done
+
+ movups xmm11,XMMWORD PTR[16+rdi]
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[16+rsi],xmm3
+ je $L$ctr32_done
+
+ movups xmm12,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_one_shortcut::
+ movups xmm2,XMMWORD PTR[r8]
+ movups xmm10,XMMWORD PTR[rdi]
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_7::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_7
+DB 102,15,56,221,209
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_done::
+ movaps xmm6,XMMWORD PTR[((-160))+rbp]
+ movaps xmm7,XMMWORD PTR[((-144))+rbp]
+ movaps xmm8,XMMWORD PTR[((-128))+rbp]
+ movaps xmm9,XMMWORD PTR[((-112))+rbp]
+ movaps xmm10,XMMWORD PTR[((-96))+rbp]
+ movaps xmm11,XMMWORD PTR[((-80))+rbp]
+ movaps xmm12,XMMWORD PTR[((-64))+rbp]
+ movaps xmm13,XMMWORD PTR[((-48))+rbp]
+ movaps xmm14,XMMWORD PTR[((-32))+rbp]
+ movaps xmm15,XMMWORD PTR[((-16))+rbp]
+ lea rsp,QWORD PTR[rbp]
+ pop rbp
+$L$ctr32_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ctr32_encrypt_blocks::
+aesni_ctr32_encrypt_blocks ENDP
+PUBLIC aesni_xts_encrypt
+
+ALIGN 16
+aesni_xts_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rax,QWORD PTR[rsp]
+ push rbp
+ sub rsp,272
+ and rsp,-16
+ movaps XMMWORD PTR[(-168)+rax],xmm6
+ movaps XMMWORD PTR[(-152)+rax],xmm7
+ movaps XMMWORD PTR[(-136)+rax],xmm8
+ movaps XMMWORD PTR[(-120)+rax],xmm9
+ movaps XMMWORD PTR[(-104)+rax],xmm10
+ movaps XMMWORD PTR[(-88)+rax],xmm11
+ movaps XMMWORD PTR[(-72)+rax],xmm12
+ movaps XMMWORD PTR[(-56)+rax],xmm13
+ movaps XMMWORD PTR[(-40)+rax],xmm14
+ movaps XMMWORD PTR[(-24)+rax],xmm15
+$L$xts_enc_body::
+ lea rbp,QWORD PTR[((-8))+rax]
+ movups xmm2,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_8::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_8
+DB 102,15,56,221,209
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov eax,r10d
+ shl r10d,4
+ mov r9,rdx
+ and rdx,-16
+
+ movups xmm1,XMMWORD PTR[16+r10*1+rcx]
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ movdqa xmm15,xmm2
+ pshufd xmm9,xmm2,05fh
+ pxor xmm1,xmm0
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm10,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm10,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm11,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm11,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm12,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm12,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm13,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm13,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm15
+ psrad xmm9,31
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pxor xmm14,xmm0
+ pxor xmm15,xmm9
+ movaps XMMWORD PTR[96+rsp],xmm1
+
+ sub rdx,16*6
+ jc $L$xts_enc_short
+
+ mov eax,16+96
+ lea rcx,QWORD PTR[32+r10*1+r11]
+ sub rax,r10
+ movups xmm1,XMMWORD PTR[16+r11]
+ mov r10,rax
+ lea r8,QWORD PTR[$L$xts_magic]
+ jmp $L$xts_enc_grandloop
+
+ALIGN 32
+$L$xts_enc_grandloop::
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqa xmm8,xmm0
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm3,xmm11
+DB 102,15,56,220,209
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm4,xmm12
+DB 102,15,56,220,217
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm5,xmm13
+DB 102,15,56,220,225
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ pxor xmm8,xmm15
+ movdqa xmm9,XMMWORD PTR[96+rsp]
+ pxor xmm6,xmm14
+DB 102,15,56,220,233
+ movups xmm0,XMMWORD PTR[32+r11]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm7,xmm8
+
+ pxor xmm10,xmm9
+DB 102,15,56,220,241
+ pxor xmm11,xmm9
+ movdqa XMMWORD PTR[rsp],xmm10
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[48+r11]
+ pxor xmm12,xmm9
+
+DB 102,15,56,220,208
+ pxor xmm13,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm11
+DB 102,15,56,220,216
+ pxor xmm14,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm12
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ pxor xmm8,xmm9
+ movdqa XMMWORD PTR[64+rsp],xmm14
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD PTR[64+r11]
+ movdqa XMMWORD PTR[80+rsp],xmm8
+ pshufd xmm9,xmm15,05fh
+ jmp $L$xts_enc_loop6
+ALIGN 32
+$L$xts_enc_loop6::
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx]
+ jnz $L$xts_enc_loop6
+
+ movdqa xmm8,XMMWORD PTR[r8]
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ paddq xmm15,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,217
+ pand xmm14,xmm8
+ movups xmm10,XMMWORD PTR[r11]
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+ pxor xmm15,xmm14
+ movaps xmm11,xmm10
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[((-64))+rcx]
+
+ movdqa xmm14,xmm9
+DB 102,15,56,220,208
+ paddd xmm9,xmm9
+ pxor xmm10,xmm15
+DB 102,15,56,220,216
+ psrad xmm14,31
+ paddq xmm15,xmm15
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ pand xmm14,xmm8
+ movaps xmm12,xmm11
+DB 102,15,56,220,240
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD PTR[((-48))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ pxor xmm11,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,217
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ pxor xmm15,xmm14
+DB 102,15,56,220,241
+ movaps xmm13,xmm12
+ movdqa xmm14,xmm9
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[((-32))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,220,208
+ pxor xmm12,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,216
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+ pxor xmm15,xmm14
+ movaps xmm14,xmm13
+DB 102,15,56,220,248
+
+ movdqa xmm0,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ pxor xmm13,xmm15
+ psrad xmm0,31
+DB 102,15,56,220,217
+ paddq xmm15,xmm15
+ pand xmm0,xmm8
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ pxor xmm15,xmm0
+ movups xmm0,XMMWORD PTR[r11]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD PTR[16+r11]
+
+ pxor xmm14,xmm15
+DB 102,15,56,221,84,36,0
+ psrad xmm9,31
+ paddq xmm15,xmm15
+DB 102,15,56,221,92,36,16
+DB 102,15,56,221,100,36,32
+ pand xmm9,xmm8
+ mov rax,r10
+DB 102,15,56,221,108,36,48
+DB 102,15,56,221,116,36,64
+DB 102,15,56,221,124,36,80
+ pxor xmm15,xmm9
+
+ lea rsi,QWORD PTR[96+rsi]
+ movups XMMWORD PTR[(-96)+rsi],xmm2
+ movups XMMWORD PTR[(-80)+rsi],xmm3
+ movups XMMWORD PTR[(-64)+rsi],xmm4
+ movups XMMWORD PTR[(-48)+rsi],xmm5
+ movups XMMWORD PTR[(-32)+rsi],xmm6
+ movups XMMWORD PTR[(-16)+rsi],xmm7
+ sub rdx,16*6
+ jnc $L$xts_enc_grandloop
+
+ mov eax,16+96
+ sub eax,r10d
+ mov rcx,r11
+ shr eax,4
+
+$L$xts_enc_short::
+ mov r10d,eax
+ pxor xmm10,xmm0
+ add rdx,16*6
+ jz $L$xts_enc_done
+
+ pxor xmm11,xmm0
+ cmp rdx,020h
+ jb $L$xts_enc_one
+ pxor xmm12,xmm0
+ je $L$xts_enc_two
+
+ pxor xmm13,xmm0
+ cmp rdx,040h
+ jb $L$xts_enc_three
+ pxor xmm14,xmm0
+ je $L$xts_enc_four
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_encrypt6
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_9::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_9
+DB 102,15,56,221,209
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_encrypt2
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_four::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_encrypt4
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm14
+ pxor xmm3,xmm11
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_done::
+ and r9,15
+ jz $L$xts_enc_ret
+ mov rdx,r9
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[rdi]
+ movzx ecx,BYTE PTR[((-16))+rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[((-16))+rsi],al
+ mov BYTE PTR[rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_enc_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[((-16))+rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_10::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_10
+DB 102,15,56,221,209
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[(-16)+rsi],xmm2
+
+$L$xts_enc_ret::
+ movaps xmm6,XMMWORD PTR[((-160))+rbp]
+ movaps xmm7,XMMWORD PTR[((-144))+rbp]
+ movaps xmm8,XMMWORD PTR[((-128))+rbp]
+ movaps xmm9,XMMWORD PTR[((-112))+rbp]
+ movaps xmm10,XMMWORD PTR[((-96))+rbp]
+ movaps xmm11,XMMWORD PTR[((-80))+rbp]
+ movaps xmm12,XMMWORD PTR[((-64))+rbp]
+ movaps xmm13,XMMWORD PTR[((-48))+rbp]
+ movaps xmm14,XMMWORD PTR[((-32))+rbp]
+ movaps xmm15,XMMWORD PTR[((-16))+rbp]
+ lea rsp,QWORD PTR[rbp]
+ pop rbp
+$L$xts_enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_encrypt::
+aesni_xts_encrypt ENDP
+PUBLIC aesni_xts_decrypt
+
+ALIGN 16
+aesni_xts_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rax,QWORD PTR[rsp]
+ push rbp
+ sub rsp,272
+ and rsp,-16
+ movaps XMMWORD PTR[(-168)+rax],xmm6
+ movaps XMMWORD PTR[(-152)+rax],xmm7
+ movaps XMMWORD PTR[(-136)+rax],xmm8
+ movaps XMMWORD PTR[(-120)+rax],xmm9
+ movaps XMMWORD PTR[(-104)+rax],xmm10
+ movaps XMMWORD PTR[(-88)+rax],xmm11
+ movaps XMMWORD PTR[(-72)+rax],xmm12
+ movaps XMMWORD PTR[(-56)+rax],xmm13
+ movaps XMMWORD PTR[(-40)+rax],xmm14
+ movaps XMMWORD PTR[(-24)+rax],xmm15
+$L$xts_dec_body::
+ lea rbp,QWORD PTR[((-8))+rax]
+ movups xmm2,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_11::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_11
+DB 102,15,56,221,209
+ xor eax,eax
+ test rdx,15
+ setnz al
+ shl rax,4
+ sub rdx,rax
+
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov eax,r10d
+ shl r10d,4
+ mov r9,rdx
+ and rdx,-16
+
+ movups xmm1,XMMWORD PTR[16+r10*1+rcx]
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ movdqa xmm15,xmm2
+ pshufd xmm9,xmm2,05fh
+ pxor xmm1,xmm0
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm10,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm10,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm11,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm11,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm12,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm12,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm13,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm13,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm15
+ psrad xmm9,31
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pxor xmm14,xmm0
+ pxor xmm15,xmm9
+ movaps XMMWORD PTR[96+rsp],xmm1
+
+ sub rdx,16*6
+ jc $L$xts_dec_short
+
+ mov eax,16+96
+ lea rcx,QWORD PTR[32+r10*1+r11]
+ sub rax,r10
+ movups xmm1,XMMWORD PTR[16+r11]
+ mov r10,rax
+ lea r8,QWORD PTR[$L$xts_magic]
+ jmp $L$xts_dec_grandloop
+
+ALIGN 32
+$L$xts_dec_grandloop::
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqa xmm8,xmm0
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm3,xmm11
+DB 102,15,56,222,209
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm4,xmm12
+DB 102,15,56,222,217
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm5,xmm13
+DB 102,15,56,222,225
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ pxor xmm8,xmm15
+ movdqa xmm9,XMMWORD PTR[96+rsp]
+ pxor xmm6,xmm14
+DB 102,15,56,222,233
+ movups xmm0,XMMWORD PTR[32+r11]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm7,xmm8
+
+ pxor xmm10,xmm9
+DB 102,15,56,222,241
+ pxor xmm11,xmm9
+ movdqa XMMWORD PTR[rsp],xmm10
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD PTR[48+r11]
+ pxor xmm12,xmm9
+
+DB 102,15,56,222,208
+ pxor xmm13,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm11
+DB 102,15,56,222,216
+ pxor xmm14,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm12
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ pxor xmm8,xmm9
+ movdqa XMMWORD PTR[64+rsp],xmm14
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD PTR[64+r11]
+ movdqa XMMWORD PTR[80+rsp],xmm8
+ pshufd xmm9,xmm15,05fh
+ jmp $L$xts_dec_loop6
+ALIGN 32
+$L$xts_dec_loop6::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx]
+ jnz $L$xts_dec_loop6
+
+ movdqa xmm8,XMMWORD PTR[r8]
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ paddq xmm15,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,217
+ pand xmm14,xmm8
+ movups xmm10,XMMWORD PTR[r11]
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+ pxor xmm15,xmm14
+ movaps xmm11,xmm10
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD PTR[((-64))+rcx]
+
+ movdqa xmm14,xmm9
+DB 102,15,56,222,208
+ paddd xmm9,xmm9
+ pxor xmm10,xmm15
+DB 102,15,56,222,216
+ psrad xmm14,31
+ paddq xmm15,xmm15
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ pand xmm14,xmm8
+ movaps xmm12,xmm11
+DB 102,15,56,222,240
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD PTR[((-48))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ pxor xmm11,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,217
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ pxor xmm15,xmm14
+DB 102,15,56,222,241
+ movaps xmm13,xmm12
+ movdqa xmm14,xmm9
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD PTR[((-32))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,222,208
+ pxor xmm12,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,216
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+ pxor xmm15,xmm14
+ movaps xmm14,xmm13
+DB 102,15,56,222,248
+
+ movdqa xmm0,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ pxor xmm13,xmm15
+ psrad xmm0,31
+DB 102,15,56,222,217
+ paddq xmm15,xmm15
+ pand xmm0,xmm8
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ pxor xmm15,xmm0
+ movups xmm0,XMMWORD PTR[r11]
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD PTR[16+r11]
+
+ pxor xmm14,xmm15
+DB 102,15,56,223,84,36,0
+ psrad xmm9,31
+ paddq xmm15,xmm15
+DB 102,15,56,223,92,36,16
+DB 102,15,56,223,100,36,32
+ pand xmm9,xmm8
+ mov rax,r10
+DB 102,15,56,223,108,36,48
+DB 102,15,56,223,116,36,64
+DB 102,15,56,223,124,36,80
+ pxor xmm15,xmm9
+
+ lea rsi,QWORD PTR[96+rsi]
+ movups XMMWORD PTR[(-96)+rsi],xmm2
+ movups XMMWORD PTR[(-80)+rsi],xmm3
+ movups XMMWORD PTR[(-64)+rsi],xmm4
+ movups XMMWORD PTR[(-48)+rsi],xmm5
+ movups XMMWORD PTR[(-32)+rsi],xmm6
+ movups XMMWORD PTR[(-16)+rsi],xmm7
+ sub rdx,16*6
+ jnc $L$xts_dec_grandloop
+
+ mov eax,16+96
+ sub eax,r10d
+ mov rcx,r11
+ shr eax,4
+
+$L$xts_dec_short::
+ mov r10d,eax
+ pxor xmm10,xmm0
+ pxor xmm11,xmm0
+ add rdx,16*6
+ jz $L$xts_dec_done
+
+ pxor xmm12,xmm0
+ cmp rdx,020h
+ jb $L$xts_dec_one
+ pxor xmm13,xmm0
+ je $L$xts_dec_two
+
+ pxor xmm14,xmm0
+ cmp rdx,040h
+ jb $L$xts_dec_three
+ je $L$xts_dec_four
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_decrypt6
+
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm14,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pcmpgtd xmm14,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ pshufd xmm11,xmm14,013h
+ and r9,15
+ jz $L$xts_dec_ret
+
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm11,xmm8
+ pxor xmm11,xmm15
+ jmp $L$xts_dec_done2
+
+ALIGN 16
+$L$xts_dec_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_12::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_12
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm11,xmm12
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_decrypt2
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm13
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm14
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_four::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_decrypt4
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm14
+ pxor xmm3,xmm11
+ movdqa xmm11,xmm15
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_done::
+ and r9,15
+ jz $L$xts_dec_ret
+$L$xts_dec_done2::
+ mov rdx,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rdi]
+ xorps xmm2,xmm11
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_13::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_13
+DB 102,15,56,223,209
+ xorps xmm2,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+rdi]
+ movzx ecx,BYTE PTR[rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[rsi],al
+ mov BYTE PTR[16+rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_dec_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_14::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_14
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_ret::
+ movaps xmm6,XMMWORD PTR[((-160))+rbp]
+ movaps xmm7,XMMWORD PTR[((-144))+rbp]
+ movaps xmm8,XMMWORD PTR[((-128))+rbp]
+ movaps xmm9,XMMWORD PTR[((-112))+rbp]
+ movaps xmm10,XMMWORD PTR[((-96))+rbp]
+ movaps xmm11,XMMWORD PTR[((-80))+rbp]
+ movaps xmm12,XMMWORD PTR[((-64))+rbp]
+ movaps xmm13,XMMWORD PTR[((-48))+rbp]
+ movaps xmm14,XMMWORD PTR[((-32))+rbp]
+ movaps xmm15,XMMWORD PTR[((-16))+rbp]
+ lea rsp,QWORD PTR[rbp]
+ pop rbp
+$L$xts_dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_decrypt::
+aesni_xts_decrypt ENDP
+PUBLIC aesni_cbc_encrypt
+
+ALIGN 16
+aesni_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ test rdx,rdx
+ jz $L$cbc_ret
+
+ mov r10d,DWORD PTR[240+rcx]
+ mov r11,rcx
+ test r9d,r9d
+ jz $L$cbc_decrypt
+
+ movups xmm2,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,16
+ jb $L$cbc_enc_tail
+ sub rdx,16
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movups xmm3,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm3
+$L$oop_enc1_15::
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_15
+DB 102,15,56,221,209
+ mov eax,r10d
+ mov rcx,r11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,16
+ jnc $L$cbc_enc_loop
+ add rdx,16
+ jnz $L$cbc_enc_tail
+ movups XMMWORD PTR[r8],xmm2
+ jmp $L$cbc_ret
+
+$L$cbc_enc_tail::
+ mov rcx,rdx
+ xchg rsi,rdi
+ DD 09066A4F3h
+ mov ecx,16
+ sub rcx,rdx
+ xor eax,eax
+ DD 09066AAF3h
+ lea rdi,QWORD PTR[((-16))+rdi]
+ mov eax,r10d
+ mov rsi,rdi
+ mov rcx,r11
+ xor rdx,rdx
+ jmp $L$cbc_enc_loop
+
+ALIGN 16
+$L$cbc_decrypt::
+ lea rax,QWORD PTR[rsp]
+ push rbp
+ sub rsp,176
+ and rsp,-16
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$cbc_decrypt_body::
+ lea rbp,QWORD PTR[((-8))+rax]
+ movups xmm10,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,050h
+ jbe $L$cbc_dec_tail
+
+ movups xmm0,XMMWORD PTR[rcx]
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqa xmm11,xmm2
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqa xmm12,xmm3
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqa xmm13,xmm4
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqa xmm14,xmm5
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqa xmm15,xmm6
+ mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ cmp rdx,070h
+ jbe $L$cbc_dec_six_or_seven
+
+ and r9d,71303168
+ sub rdx,050h
+ cmp r9d,4194304
+ je $L$cbc_dec_loop6_enter
+ sub rdx,020h
+ lea rcx,QWORD PTR[112+rcx]
+ jmp $L$cbc_dec_loop8_enter
+ALIGN 16
+$L$cbc_dec_loop8::
+ movups XMMWORD PTR[rsi],xmm9
+ lea rsi,QWORD PTR[16+rsi]
+$L$cbc_dec_loop8_enter::
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ pxor xmm2,xmm0
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ pxor xmm3,xmm0
+ movups xmm1,XMMWORD PTR[((16-112))+rcx]
+ pxor xmm4,xmm0
+ xor r11,r11
+ cmp rdx,070h
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ pxor xmm7,xmm0
+ pxor xmm8,xmm0
+
+DB 102,15,56,222,209
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[((32-112))+rcx]
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+ setnc r11b
+ shl r11,7
+DB 102,68,15,56,222,201
+ add r11,rdi
+ movups xmm1,XMMWORD PTR[((48-112))+rcx]
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((64-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD PTR[((80-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((96-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD PTR[((112-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((128-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD PTR[((144-112))+rcx]
+ cmp eax,11
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((160-112))+rcx]
+ jb $L$cbc_dec_done
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD PTR[((176-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((192-112))+rcx]
+ je $L$cbc_dec_done
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD PTR[((208-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD PTR[((224-112))+rcx]
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_done::
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+ pxor xmm10,xmm0
+ pxor xmm11,xmm0
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ pxor xmm12,xmm0
+ pxor xmm13,xmm0
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ pxor xmm14,xmm0
+ pxor xmm15,xmm0
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movdqu xmm1,XMMWORD PTR[80+rdi]
+
+DB 102,65,15,56,223,210
+ movdqu xmm10,XMMWORD PTR[96+rdi]
+ pxor xmm1,xmm0
+DB 102,65,15,56,223,219
+ pxor xmm10,xmm0
+ movdqu xmm0,XMMWORD PTR[112+rdi]
+DB 102,65,15,56,223,228
+ lea rdi,QWORD PTR[128+rdi]
+ movdqu xmm11,XMMWORD PTR[r11]
+DB 102,65,15,56,223,237
+DB 102,65,15,56,223,246
+ movdqu xmm12,XMMWORD PTR[16+r11]
+ movdqu xmm13,XMMWORD PTR[32+r11]
+DB 102,65,15,56,223,255
+DB 102,68,15,56,223,193
+ movdqu xmm14,XMMWORD PTR[48+r11]
+ movdqu xmm15,XMMWORD PTR[64+r11]
+DB 102,69,15,56,223,202
+ movdqa xmm10,xmm0
+ movdqu xmm1,XMMWORD PTR[80+r11]
+ movups xmm0,XMMWORD PTR[((-112))+rcx]
+
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm2,xmm11
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqa xmm3,xmm12
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqa xmm4,xmm13
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqa xmm5,xmm14
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqa xmm6,xmm15
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqa xmm7,xmm1
+ movups XMMWORD PTR[96+rsi],xmm8
+ lea rsi,QWORD PTR[112+rsi]
+
+ sub rdx,080h
+ ja $L$cbc_dec_loop8
+
+ movaps xmm2,xmm9
+ lea rcx,QWORD PTR[((-112))+rcx]
+ add rdx,070h
+ jle $L$cbc_dec_tail_collected
+ movups XMMWORD PTR[rsi],xmm9
+ lea rsi,QWORD PTR[16+rsi]
+ cmp rdx,050h
+ jbe $L$cbc_dec_tail
+
+ movaps xmm2,xmm11
+$L$cbc_dec_six_or_seven::
+ cmp rdx,060h
+ ja $L$cbc_dec_seven
+
+ movaps xmm8,xmm7
+ call _aesni_decrypt6
+ pxor xmm2,xmm10
+ movaps xmm10,xmm8
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pxor xmm7,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ movdqa xmm2,xmm7
+ jmp $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_seven::
+ movups xmm8,XMMWORD PTR[96+rdi]
+ xorps xmm9,xmm9
+ call _aesni_decrypt8
+ movups xmm9,XMMWORD PTR[80+rdi]
+ pxor xmm2,xmm10
+ movups xmm10,XMMWORD PTR[96+rdi]
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pxor xmm7,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ pxor xmm8,xmm9
+ movdqu XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ movdqa xmm2,xmm8
+ jmp $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_loop6::
+ movups XMMWORD PTR[rsi],xmm7
+ lea rsi,QWORD PTR[16+rsi]
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqa xmm11,xmm2
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqa xmm12,xmm3
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqa xmm13,xmm4
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqa xmm14,xmm5
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqa xmm15,xmm6
+$L$cbc_dec_loop6_enter::
+ lea rdi,QWORD PTR[96+rdi]
+ movdqa xmm8,xmm7
+
+ call _aesni_decrypt6
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm8
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm6,xmm14
+ mov rcx,r11
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pxor xmm7,xmm15
+ mov eax,r10d
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ sub rdx,060h
+ ja $L$cbc_dec_loop6
+
+ movdqa xmm2,xmm7
+ add rdx,050h
+ jle $L$cbc_dec_tail_collected
+ movups XMMWORD PTR[rsi],xmm7
+ lea rsi,QWORD PTR[16+rsi]
+
+$L$cbc_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ sub rdx,010h
+ jbe $L$cbc_dec_one
+
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movaps xmm11,xmm2
+ sub rdx,010h
+ jbe $L$cbc_dec_two
+
+ movups xmm4,XMMWORD PTR[32+rdi]
+ movaps xmm12,xmm3
+ sub rdx,010h
+ jbe $L$cbc_dec_three
+
+ movups xmm5,XMMWORD PTR[48+rdi]
+ movaps xmm13,xmm4
+ sub rdx,010h
+ jbe $L$cbc_dec_four
+
+ movups xmm6,XMMWORD PTR[64+rdi]
+ movaps xmm14,xmm5
+ movaps xmm15,xmm6
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ pxor xmm2,xmm10
+ movaps xmm10,xmm15
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ movdqa xmm2,xmm6
+ sub rdx,010h
+ jmp $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_one::
+ movaps xmm11,xmm2
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_16::
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_16
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movaps xmm10,xmm11
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_two::
+ movaps xmm12,xmm3
+ call _aesni_decrypt2
+ pxor xmm2,xmm10
+ movaps xmm10,xmm12
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ movdqa xmm2,xmm3
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_three::
+ movaps xmm13,xmm4
+ call _aesni_decrypt3
+ pxor xmm2,xmm10
+ movaps xmm10,xmm13
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ movdqa xmm2,xmm4
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_four::
+ movaps xmm14,xmm5
+ call _aesni_decrypt4
+ pxor xmm2,xmm10
+ movaps xmm10,xmm14
+ pxor xmm3,xmm11
+ movdqu XMMWORD PTR[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqa xmm2,xmm5
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_tail_collected::
+ movups XMMWORD PTR[r8],xmm10
+ and rdx,15
+ jnz $L$cbc_dec_tail_partial
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$cbc_dec_ret
+ALIGN 16
+$L$cbc_dec_tail_partial::
+ movaps XMMWORD PTR[rsp],xmm2
+ mov rcx,16
+ mov rdi,rsi
+ sub rcx,rdx
+ lea rsi,QWORD PTR[rsp]
+ DD 09066A4F3h
+
+$L$cbc_dec_ret::
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[rbp]
+ pop rbp
+$L$cbc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_encrypt::
+aesni_cbc_encrypt ENDP
+PUBLIC aesni_set_decrypt_key
+
+ALIGN 16
+aesni_set_decrypt_key PROC PUBLIC
+DB 048h,083h,0ECh,008h
+ call __aesni_set_encrypt_key
+ shl edx,4
+ test eax,eax
+ jnz $L$dec_key_ret
+ lea rcx,QWORD PTR[16+rdx*1+r8]
+
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ movups XMMWORD PTR[rcx],xmm0
+ movups XMMWORD PTR[r8],xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+
+$L$dec_key_inverse::
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+DB 102,15,56,219,192
+DB 102,15,56,219,201
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+ movups XMMWORD PTR[16+rcx],xmm0
+ movups XMMWORD PTR[(-16)+r8],xmm1
+ cmp rcx,r8
+ ja $L$dec_key_inverse
+
+ movups xmm0,XMMWORD PTR[r8]
+DB 102,15,56,219,192
+ movups XMMWORD PTR[rcx],xmm0
+$L$dec_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_decrypt_key::
+aesni_set_decrypt_key ENDP
+PUBLIC aesni_set_encrypt_key
+
+ALIGN 16
+aesni_set_encrypt_key PROC PUBLIC
+__aesni_set_encrypt_key::
+DB 048h,083h,0ECh,008h
+ mov rax,-1
+ test rcx,rcx
+ jz $L$enc_key_ret
+ test r8,r8
+ jz $L$enc_key_ret
+
+ movups xmm0,XMMWORD PTR[rcx]
+ xorps xmm4,xmm4
+ lea rax,QWORD PTR[16+r8]
+ cmp edx,256
+ je $L$14rounds
+ cmp edx,192
+ je $L$12rounds
+ cmp edx,128
+ jne $L$bad_keybits
+
+$L$10rounds::
+ mov edx,9
+ movups XMMWORD PTR[r8],xmm0
+DB 102,15,58,223,200,1
+ call $L$key_expansion_128_cold
+DB 102,15,58,223,200,2
+ call $L$key_expansion_128
+DB 102,15,58,223,200,4
+ call $L$key_expansion_128
+DB 102,15,58,223,200,8
+ call $L$key_expansion_128
+DB 102,15,58,223,200,16
+ call $L$key_expansion_128
+DB 102,15,58,223,200,32
+ call $L$key_expansion_128
+DB 102,15,58,223,200,64
+ call $L$key_expansion_128
+DB 102,15,58,223,200,128
+ call $L$key_expansion_128
+DB 102,15,58,223,200,27
+ call $L$key_expansion_128
+DB 102,15,58,223,200,54
+ call $L$key_expansion_128
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[80+rax],edx
+ xor eax,eax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$12rounds::
+ movq xmm2,QWORD PTR[16+rcx]
+ mov edx,11
+ movups XMMWORD PTR[r8],xmm0
+DB 102,15,58,223,202,1
+ call $L$key_expansion_192a_cold
+DB 102,15,58,223,202,2
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,4
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,8
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,16
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,32
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,64
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,128
+ call $L$key_expansion_192b
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[48+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$14rounds::
+ movups xmm2,XMMWORD PTR[16+rcx]
+ mov edx,13
+ lea rax,QWORD PTR[16+rax]
+ movups XMMWORD PTR[r8],xmm0
+ movups XMMWORD PTR[16+r8],xmm2
+DB 102,15,58,223,202,1
+ call $L$key_expansion_256a_cold
+DB 102,15,58,223,200,1
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,2
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,2
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,4
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,4
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,8
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,8
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,16
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,16
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,32
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,32
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,64
+ call $L$key_expansion_256a
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[16+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$bad_keybits::
+ mov rax,-2
+$L$enc_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_encrypt_key::
+
+ALIGN 16
+$L$key_expansion_128::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_128_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192a::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_192a_cold::
+ movaps xmm5,xmm2
+$L$key_expansion_192b_warm::
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192b::
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups XMMWORD PTR[rax],xmm5
+ shufps xmm3,xmm2,78
+ movups XMMWORD PTR[16+rax],xmm3
+ lea rax,QWORD PTR[32+rax]
+ jmp $L$key_expansion_192b_warm
+
+ALIGN 16
+$L$key_expansion_256a::
+ movups XMMWORD PTR[rax],xmm2
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_256a_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_256b::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ DB 0F3h,0C3h ;repret
+aesni_set_encrypt_key ENDP
+
+ALIGN 64
+$L$bswap_mask::
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32::
+ DD 6,6,6,0
+$L$increment64::
+ DD 1,0,0,0
+$L$xts_magic::
+ DD 087h,0,1,0
+$L$increment1::
+DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+
+DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+ecb_ccm64_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+
+ jmp $L$common_seh_tail
+ecb_ccm64_se_handler ENDP
+
+
+ALIGN 16
+ctr_xts_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov rax,QWORD PTR[160+r8]
+ lea rsi,QWORD PTR[((-160))+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+
+ jmp $L$common_rbp_tail
+ctr_xts_se_handler ENDP
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_decrypt]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ lea r10,QWORD PTR[$L$cbc_decrypt_body]
+ cmp rbx,r10
+ jb $L$restore_cbc_rax
+
+ lea r10,QWORD PTR[$L$cbc_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[16+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+
+$L$common_rbp_tail::
+ mov rax,QWORD PTR[160+r8]
+ mov rbp,QWORD PTR[rax]
+ lea rax,QWORD PTR[8+rax]
+ mov QWORD PTR[160+r8],rbp
+ jmp $L$common_seh_tail
+
+$L$restore_cbc_rax::
+ mov rax,QWORD PTR[120+r8]
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_aesni_ecb_encrypt
+ DD imagerel $L$SEH_end_aesni_ecb_encrypt
+ DD imagerel $L$SEH_info_ecb
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_enc
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_dec
+
+ DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_info_ctr32
+
+ DD imagerel $L$SEH_begin_aesni_xts_encrypt
+ DD imagerel $L$SEH_end_aesni_xts_encrypt
+ DD imagerel $L$SEH_info_xts_enc
+
+ DD imagerel $L$SEH_begin_aesni_xts_decrypt
+ DD imagerel $L$SEH_end_aesni_xts_decrypt
+ DD imagerel $L$SEH_info_xts_dec
+ DD imagerel $L$SEH_begin_aesni_cbc_encrypt
+ DD imagerel $L$SEH_end_aesni_cbc_encrypt
+ DD imagerel $L$SEH_info_cbc
+
+ DD imagerel aesni_set_decrypt_key
+ DD imagerel $L$SEH_end_set_decrypt_key
+ DD imagerel $L$SEH_info_key
+
+ DD imagerel aesni_set_encrypt_key
+ DD imagerel $L$SEH_end_set_encrypt_key
+ DD imagerel $L$SEH_info_key
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_ecb::
+DB 9,0,0,0
+ DD imagerel ecb_ccm64_se_handler
+ DD imagerel $L$ecb_enc_body,imagerel $L$ecb_enc_ret
+$L$SEH_info_ccm64_enc::
+DB 9,0,0,0
+ DD imagerel ecb_ccm64_se_handler
+ DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret
+$L$SEH_info_ccm64_dec::
+DB 9,0,0,0
+ DD imagerel ecb_ccm64_se_handler
+ DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret
+$L$SEH_info_ctr32::
+DB 9,0,0,0
+ DD imagerel ctr_xts_se_handler
+ DD imagerel $L$ctr32_body,imagerel $L$ctr32_epilogue
+$L$SEH_info_xts_enc::
+DB 9,0,0,0
+ DD imagerel ctr_xts_se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$SEH_info_xts_dec::
+DB 9,0,0,0
+ DD imagerel ctr_xts_se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+$L$SEH_info_cbc::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+$L$SEH_info_key::
+DB 001h,004h,001h,000h
+DB 004h,002h,000h,000h
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/aes/bsaes-x86_64.asm b/win-x86_64/crypto/aes/bsaes-x86_64.asm
new file mode 100644
index 0000000..3346a7e
--- /dev/null
+++ b/win-x86_64/crypto/aes/bsaes-x86_64.asm
@@ -0,0 +1,2734 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN asm_AES_encrypt:NEAR
+EXTERN asm_AES_decrypt:NEAR
+
+
+ALIGN 64
+_bsaes_encrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[80+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+ pxor xmm1,xmm8
+ pxor xmm2,xmm8
+DB 102,68,15,56,0,255
+DB 102,15,56,0,199
+ pxor xmm3,xmm8
+ pxor xmm4,xmm8
+DB 102,15,56,0,207
+DB 102,15,56,0,215
+ pxor xmm5,xmm8
+ pxor xmm6,xmm8
+DB 102,15,56,0,223
+DB 102,15,56,0,231
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+_bsaes_encrypt8_bitslice::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$enc_sbox
+ALIGN 16
+$L$enc_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+ pxor xmm1,XMMWORD PTR[32+rax]
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,68,15,56,0,255
+DB 102,15,56,0,199
+ pxor xmm3,XMMWORD PTR[64+rax]
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,207
+DB 102,15,56,0,215
+ pxor xmm5,XMMWORD PTR[96+rax]
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,223
+DB 102,15,56,0,231
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ lea rax,QWORD PTR[128+rax]
+$L$enc_sbox::
+ pxor xmm4,xmm5
+ pxor xmm1,xmm0
+ pxor xmm2,xmm15
+ pxor xmm5,xmm1
+ pxor xmm4,xmm15
+
+ pxor xmm5,xmm2
+ pxor xmm2,xmm6
+ pxor xmm6,xmm4
+ pxor xmm2,xmm3
+ pxor xmm3,xmm4
+ pxor xmm2,xmm0
+
+ pxor xmm1,xmm6
+ pxor xmm0,xmm4
+ movdqa xmm10,xmm6
+ movdqa xmm9,xmm0
+ movdqa xmm8,xmm4
+ movdqa xmm12,xmm1
+ movdqa xmm11,xmm5
+
+ pxor xmm10,xmm3
+ pxor xmm9,xmm1
+ pxor xmm8,xmm2
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm3
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm15
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm2
+ pxor xmm11,xmm15
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm6
+ movdqa xmm11,xmm4
+ pxor xmm12,xmm0
+ pxor xmm11,xmm5
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm1
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm3
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm0
+ pand xmm11,xmm2
+ movdqa xmm14,xmm6
+ pand xmm12,xmm15
+ pand xmm13,xmm4
+ por xmm14,xmm5
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm5
+ movdqa xmm7,xmm4
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm5
+ pxor xmm5,xmm4
+ pand xmm4,xmm14
+ pand xmm5,xmm13
+ pxor xmm5,xmm4
+ pxor xmm4,xmm9
+ pxor xmm11,xmm15
+ pxor xmm7,xmm2
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm2
+ pand xmm7,xmm14
+ pand xmm2,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm2
+ pxor xmm11,xmm10
+ pxor xmm2,xmm9
+ pxor xmm5,xmm11
+ pxor xmm15,xmm11
+ pxor xmm4,xmm7
+ pxor xmm2,xmm7
+
+ movdqa xmm11,xmm6
+ movdqa xmm7,xmm0
+ pxor xmm11,xmm3
+ pxor xmm7,xmm1
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm3
+ pxor xmm11,xmm7
+ pxor xmm3,xmm1
+ pand xmm7,xmm14
+ pand xmm1,xmm12
+ pand xmm11,xmm13
+ pand xmm3,xmm8
+ pxor xmm7,xmm11
+ pxor xmm3,xmm1
+ pxor xmm11,xmm10
+ pxor xmm1,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm6
+ pxor xmm6,xmm0
+ pand xmm0,xmm14
+ pand xmm6,xmm13
+ pxor xmm6,xmm0
+ pxor xmm0,xmm10
+ pxor xmm6,xmm11
+ pxor xmm3,xmm11
+ pxor xmm0,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm15
+ pxor xmm0,xmm5
+ pxor xmm3,xmm6
+ pxor xmm5,xmm15
+ pxor xmm15,xmm0
+
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ pxor xmm1,xmm2
+ pxor xmm2,xmm4
+ pxor xmm3,xmm4
+
+ pxor xmm5,xmm2
+ dec r10d
+ jl $L$enc_done
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm3,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm5,093h
+ pxor xmm3,xmm9
+ pshufd xmm11,xmm2,093h
+ pxor xmm5,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm2,xmm11
+ pshufd xmm13,xmm1,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm1,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm2
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm5
+ pshufd xmm7,xmm2,04Eh
+ pxor xmm14,xmm1
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm3
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm5,xmm1,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm8,xmm12
+ pxor xmm2,xmm10
+ pxor xmm6,xmm14
+ pxor xmm5,xmm13
+ movdqa xmm3,xmm7
+ pxor xmm1,xmm9
+ movdqa xmm4,xmm8
+ movdqa xmm7,XMMWORD PTR[48+r11]
+ jnz $L$enc_loop
+ movdqa xmm7,XMMWORD PTR[64+r11]
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm2
+ psrlq xmm2,1
+ pxor xmm1,xmm4
+ pxor xmm2,xmm6
+ pand xmm1,xmm7
+ pand xmm2,xmm7
+ pxor xmm4,xmm1
+ psllq xmm1,1
+ pxor xmm6,xmm2
+ psllq xmm2,1
+ pxor xmm1,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm3,xmm5
+ pxor xmm15,xmm0
+ pand xmm3,xmm7
+ pand xmm15,xmm7
+ pxor xmm5,xmm3
+ psllq xmm3,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm3,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm2
+ psrlq xmm2,2
+ pxor xmm6,xmm4
+ pxor xmm2,xmm1
+ pand xmm6,xmm8
+ pand xmm2,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm1,xmm2
+ psllq xmm2,2
+ pxor xmm6,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm5
+ pxor xmm15,xmm3
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm5,xmm0
+ psllq xmm0,2
+ pxor xmm3,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,4
+ movdqa xmm10,xmm3
+ psrlq xmm3,4
+ pxor xmm5,xmm4
+ pxor xmm3,xmm1
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm4,xmm5
+ psllq xmm5,4
+ pxor xmm1,xmm3
+ psllq xmm3,4
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm2
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm2,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm3,xmm7
+ pxor xmm5,xmm7
+ pxor xmm2,xmm7
+ pxor xmm6,xmm7
+ pxor xmm1,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_encrypt8 ENDP
+
+
+ALIGN 64
+_bsaes_decrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[((-48))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+ pxor xmm1,xmm8
+ pxor xmm2,xmm8
+DB 102,68,15,56,0,255
+DB 102,15,56,0,199
+ pxor xmm3,xmm8
+ pxor xmm4,xmm8
+DB 102,15,56,0,207
+DB 102,15,56,0,215
+ pxor xmm5,xmm8
+ pxor xmm6,xmm8
+DB 102,15,56,0,223
+DB 102,15,56,0,231
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$dec_sbox
+ALIGN 16
+$L$dec_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+ pxor xmm1,XMMWORD PTR[32+rax]
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,68,15,56,0,255
+DB 102,15,56,0,199
+ pxor xmm3,XMMWORD PTR[64+rax]
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,207
+DB 102,15,56,0,215
+ pxor xmm5,XMMWORD PTR[96+rax]
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,223
+DB 102,15,56,0,231
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ lea rax,QWORD PTR[128+rax]
+$L$dec_sbox::
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm6
+ pxor xmm1,xmm6
+ pxor xmm5,xmm3
+ pxor xmm6,xmm5
+ pxor xmm0,xmm6
+
+ pxor xmm15,xmm0
+ pxor xmm1,xmm4
+ pxor xmm2,xmm15
+ pxor xmm4,xmm15
+ pxor xmm0,xmm2
+ movdqa xmm10,xmm2
+ movdqa xmm9,xmm6
+ movdqa xmm8,xmm0
+ movdqa xmm12,xmm3
+ movdqa xmm11,xmm4
+
+ pxor xmm10,xmm15
+ pxor xmm9,xmm3
+ pxor xmm8,xmm5
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm15
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm1
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm5
+ pxor xmm11,xmm1
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm2
+ movdqa xmm11,xmm0
+ pxor xmm12,xmm6
+ pxor xmm11,xmm4
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm3
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm15
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm6
+ pand xmm11,xmm5
+ movdqa xmm14,xmm2
+ pand xmm12,xmm1
+ pand xmm13,xmm0
+ por xmm14,xmm4
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm4
+ movdqa xmm7,xmm0
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm4
+ pxor xmm4,xmm0
+ pand xmm0,xmm14
+ pand xmm4,xmm13
+ pxor xmm4,xmm0
+ pxor xmm0,xmm9
+ pxor xmm11,xmm1
+ pxor xmm7,xmm5
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm1
+ pxor xmm11,xmm7
+ pxor xmm1,xmm5
+ pand xmm7,xmm14
+ pand xmm5,xmm12
+ pand xmm11,xmm13
+ pand xmm1,xmm8
+ pxor xmm7,xmm11
+ pxor xmm1,xmm5
+ pxor xmm11,xmm10
+ pxor xmm5,xmm9
+ pxor xmm4,xmm11
+ pxor xmm1,xmm11
+ pxor xmm0,xmm7
+ pxor xmm5,xmm7
+
+ movdqa xmm11,xmm2
+ movdqa xmm7,xmm6
+ pxor xmm11,xmm15
+ pxor xmm7,xmm3
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm3
+ pand xmm7,xmm14
+ pand xmm3,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm3
+ pxor xmm11,xmm10
+ pxor xmm3,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm2
+ pxor xmm2,xmm6
+ pand xmm6,xmm14
+ pand xmm2,xmm13
+ pxor xmm2,xmm6
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm15,xmm11
+ pxor xmm6,xmm7
+ pxor xmm3,xmm7
+ pxor xmm0,xmm6
+ pxor xmm5,xmm4
+
+ pxor xmm3,xmm0
+ pxor xmm1,xmm6
+ pxor xmm4,xmm6
+ pxor xmm3,xmm1
+ pxor xmm6,xmm15
+ pxor xmm3,xmm4
+ pxor xmm2,xmm5
+ pxor xmm5,xmm0
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm15
+ pxor xmm6,xmm2
+ dec r10d
+ jl $L$dec_done
+
+ pshufd xmm7,xmm15,04Eh
+ pshufd xmm13,xmm2,04Eh
+ pxor xmm7,xmm15
+ pshufd xmm14,xmm4,04Eh
+ pxor xmm13,xmm2
+ pshufd xmm8,xmm0,04Eh
+ pxor xmm14,xmm4
+ pshufd xmm9,xmm5,04Eh
+ pxor xmm8,xmm0
+ pshufd xmm10,xmm3,04Eh
+ pxor xmm9,xmm5
+ pxor xmm15,xmm13
+ pxor xmm0,xmm13
+ pshufd xmm11,xmm1,04Eh
+ pxor xmm10,xmm3
+ pxor xmm5,xmm7
+ pxor xmm3,xmm8
+ pshufd xmm12,xmm6,04Eh
+ pxor xmm11,xmm1
+ pxor xmm0,xmm14
+ pxor xmm1,xmm9
+ pxor xmm12,xmm6
+
+ pxor xmm5,xmm14
+ pxor xmm3,xmm13
+ pxor xmm1,xmm13
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm1,xmm14
+ pxor xmm6,xmm14
+ pxor xmm4,xmm12
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm5,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm3,093h
+ pxor xmm5,xmm9
+ pshufd xmm11,xmm1,093h
+ pxor xmm3,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm1,xmm11
+ pshufd xmm13,xmm2,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm2,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm1
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm3
+ pshufd xmm7,xmm1,04Eh
+ pxor xmm14,xmm2
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm5
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm3,xmm2,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm8,xmm12
+ pxor xmm10,xmm1
+ pxor xmm6,xmm14
+ pxor xmm13,xmm3
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm9
+ movdqa xmm5,xmm13
+ movdqa xmm4,xmm8
+ movdqa xmm1,xmm2
+ movdqa xmm2,xmm10
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ jnz $L$dec_loop
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm2
+ psrlq xmm2,1
+ movdqa xmm10,xmm1
+ psrlq xmm1,1
+ pxor xmm2,xmm4
+ pxor xmm1,xmm6
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm4,xmm2
+ psllq xmm2,1
+ pxor xmm6,xmm1
+ psllq xmm1,1
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm5,xmm3
+ pxor xmm15,xmm0
+ pand xmm5,xmm7
+ pand xmm15,xmm7
+ pxor xmm3,xmm5
+ psllq xmm5,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm5,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm1
+ psrlq xmm1,2
+ pxor xmm6,xmm4
+ pxor xmm1,xmm2
+ pand xmm6,xmm8
+ pand xmm1,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm2,xmm1
+ psllq xmm1,2
+ pxor xmm6,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm3
+ pxor xmm15,xmm5
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm3,xmm0
+ psllq xmm0,2
+ pxor xmm5,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,4
+ movdqa xmm10,xmm5
+ psrlq xmm5,4
+ pxor xmm3,xmm4
+ pxor xmm5,xmm2
+ pand xmm3,xmm7
+ pand xmm5,xmm7
+ pxor xmm4,xmm3
+ psllq xmm3,4
+ pxor xmm2,xmm5
+ psllq xmm5,4
+ pxor xmm3,xmm9
+ pxor xmm5,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm1
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm1,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm5,xmm7
+ pxor xmm3,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm7
+ pxor xmm2,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_decrypt8 ENDP
+
+ALIGN 16
+_bsaes_key_convert PROC PRIVATE
+ lea r11,QWORD PTR[$L$masks]
+ movdqu xmm7,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ movdqa xmm0,XMMWORD PTR[r11]
+ movdqa xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm4,XMMWORD PTR[64+r11]
+ pcmpeqd xmm5,xmm5
+
+ movdqu xmm6,XMMWORD PTR[rcx]
+ movdqa XMMWORD PTR[rax],xmm7
+ lea rax,QWORD PTR[16+rax]
+ dec r10d
+ jmp $L$key_loop
+ALIGN 16
+$L$key_loop::
+DB 102,15,56,0,244
+
+ movdqa xmm8,xmm0
+ movdqa xmm9,xmm1
+
+ pand xmm8,xmm6
+ pand xmm9,xmm6
+ movdqa xmm10,xmm2
+ pcmpeqb xmm8,xmm0
+ psllq xmm0,4
+ movdqa xmm11,xmm3
+ pcmpeqb xmm9,xmm1
+ psllq xmm1,4
+
+ pand xmm10,xmm6
+ pand xmm11,xmm6
+ movdqa xmm12,xmm0
+ pcmpeqb xmm10,xmm2
+ psllq xmm2,4
+ movdqa xmm13,xmm1
+ pcmpeqb xmm11,xmm3
+ psllq xmm3,4
+
+ movdqa xmm14,xmm2
+ movdqa xmm15,xmm3
+ pxor xmm8,xmm5
+ pxor xmm9,xmm5
+
+ pand xmm12,xmm6
+ pand xmm13,xmm6
+ movdqa XMMWORD PTR[rax],xmm8
+ pcmpeqb xmm12,xmm0
+ psrlq xmm0,4
+ movdqa XMMWORD PTR[16+rax],xmm9
+ pcmpeqb xmm13,xmm1
+ psrlq xmm1,4
+ lea rcx,QWORD PTR[16+rcx]
+
+ pand xmm14,xmm6
+ pand xmm15,xmm6
+ movdqa XMMWORD PTR[32+rax],xmm10
+ pcmpeqb xmm14,xmm2
+ psrlq xmm2,4
+ movdqa XMMWORD PTR[48+rax],xmm11
+ pcmpeqb xmm15,xmm3
+ psrlq xmm3,4
+ movdqu xmm6,XMMWORD PTR[rcx]
+
+ pxor xmm13,xmm5
+ pxor xmm14,xmm5
+ movdqa XMMWORD PTR[64+rax],xmm12
+ movdqa XMMWORD PTR[80+rax],xmm13
+ movdqa XMMWORD PTR[96+rax],xmm14
+ movdqa XMMWORD PTR[112+rax],xmm15
+ lea rax,QWORD PTR[128+rax]
+ dec r10d
+ jnz $L$key_loop
+
+ movdqa xmm7,XMMWORD PTR[80+r11]
+
+ DB 0F3h,0C3h ;repret
+_bsaes_key_convert ENDP
+EXTERN asm_AES_cbc_encrypt:NEAR
+PUBLIC bsaes_cbc_encrypt
+
+ALIGN 16
+bsaes_cbc_encrypt PROC PUBLIC
+ mov r11d,DWORD PTR[48+rsp]
+ cmp r11d,0
+ jne asm_AES_cbc_encrypt
+ cmp r8,128
+ jb asm_AES_cbc_encrypt
+
+ mov rax,rsp
+$L$cbc_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$cbc_dec_body::
+ mov rbp,rsp
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ mov rbx,r10
+ shr r14,4
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ movdqu xmm14,XMMWORD PTR[rbx]
+ sub r14,8
+$L$cbc_dec_loop::
+ movdqu xmm15,XMMWORD PTR[r12]
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ mov rax,rsp
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ mov r10d,edx
+ movdqu xmm6,XMMWORD PTR[112+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ pxor xmm4,xmm13
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r12,QWORD PTR[128+r12]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ sub r14,8
+ jnc $L$cbc_dec_loop
+
+ add r14,8
+ jz $L$cbc_dec_done
+
+ movdqu xmm15,XMMWORD PTR[r12]
+ mov rax,rsp
+ mov r10d,edx
+ cmp r14,2
+ jb $L$cbc_dec_one
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ je $L$cbc_dec_two
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ cmp r14,4
+ jb $L$cbc_dec_three
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ je $L$cbc_dec_four
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ cmp r14,6
+ jb $L$cbc_dec_five
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ je $L$cbc_dec_six
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm14,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_six::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm14,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_five::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm14,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_four::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm14,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_three::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm14,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_two::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm14,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_one::
+ lea rcx,QWORD PTR[r12]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm14,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm14
+ movdqa xmm14,xmm15
+
+$L$cbc_dec_done::
+ movdqu XMMWORD PTR[rbx],xmm14
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$cbc_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$cbc_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$cbc_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_cbc_encrypt ENDP
+
+PUBLIC bsaes_ctr32_encrypt_blocks
+
+ALIGN 16
+bsaes_ctr32_encrypt_blocks PROC PUBLIC
+ mov rax,rsp
+$L$ctr_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$ctr_enc_body::
+ mov rbp,rsp
+ movdqu xmm0,XMMWORD PTR[r10]
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ movdqa XMMWORD PTR[32+rbp],xmm0
+ cmp r8,8
+ jb $L$ctr_enc_short
+
+ mov ebx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,ebx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+DB 102,68,15,56,0,199
+DB 102,68,15,56,0,255
+ movdqa XMMWORD PTR[rsp],xmm8
+ jmp $L$ctr_enc_loop
+ALIGN 16
+$L$ctr_enc_loop::
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ movdqa xmm0,xmm15
+ movdqa xmm1,xmm15
+ paddd xmm0,XMMWORD PTR[r11]
+ movdqa xmm2,xmm15
+ paddd xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm3,xmm15
+ paddd xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm4,xmm15
+ paddd xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm5,xmm15
+ paddd xmm4,XMMWORD PTR[64+r11]
+ movdqa xmm6,xmm15
+ paddd xmm5,XMMWORD PTR[80+r11]
+ paddd xmm6,XMMWORD PTR[96+r11]
+
+
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea rax,QWORD PTR[16+rsp]
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+ pxor xmm1,xmm8
+ pxor xmm2,xmm8
+DB 102,68,15,56,0,255
+DB 102,15,56,0,199
+ pxor xmm3,xmm8
+ pxor xmm4,xmm8
+DB 102,15,56,0,207
+DB 102,15,56,0,215
+ pxor xmm5,xmm8
+ pxor xmm6,xmm8
+DB 102,15,56,0,223
+DB 102,15,56,0,231
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ lea r11,QWORD PTR[$L$BS0]
+ mov r10d,ebx
+
+ call _bsaes_encrypt8_bitslice
+
+ sub r14,8
+ jc $L$ctr_enc_loop_done
+
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ pxor xmm7,xmm15
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[r13],xmm7
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,xmm14
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ paddd xmm15,XMMWORD PTR[112+r11]
+ jnz $L$ctr_enc_loop
+
+ jmp $L$ctr_enc_done
+ALIGN 16
+$L$ctr_enc_loop_done::
+ add r14,8
+ movdqu xmm7,XMMWORD PTR[r12]
+ pxor xmm15,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ cmp r14,2
+ jb $L$ctr_enc_done
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[16+r13],xmm0
+ je $L$ctr_enc_done
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[32+r13],xmm3
+ cmp r14,4
+ jb $L$ctr_enc_done
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[48+r13],xmm5
+ je $L$ctr_enc_done
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[64+r13],xmm2
+ cmp r14,6
+ jb $L$ctr_enc_done
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[80+r13],xmm6
+ je $L$ctr_enc_done
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[96+r13],xmm1
+ jmp $L$ctr_enc_done
+
+ALIGN 16
+$L$ctr_enc_short::
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[48+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ movdqu xmm0,XMMWORD PTR[r12]
+ lea r12,QWORD PTR[16+r12]
+ mov eax,DWORD PTR[44+rbp]
+ bswap eax
+ pxor xmm0,XMMWORD PTR[48+rbp]
+ inc eax
+ movdqu XMMWORD PTR[r13],xmm0
+ bswap eax
+ lea r13,QWORD PTR[16+r13]
+ mov DWORD PTR[44+rsp],eax
+ dec r14
+ jnz $L$ctr_enc_short
+
+$L$ctr_enc_done::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$ctr_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$ctr_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$ctr_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_ctr32_encrypt_blocks ENDP
+PUBLIC bsaes_xts_encrypt
+
+ALIGN 16
+bsaes_xts_encrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_enc_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ and r14,-16
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_enc_short
+ jmp $L$xts_enc_loop
+
+ALIGN 16
+$L$xts_enc_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm1
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_enc_loop
+
+$L$xts_enc_short::
+ add r14,080h
+ jz $L$xts_enc_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_enc_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_enc_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_enc_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_enc_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_enc_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_enc_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ movdqu XMMWORD PTR[64+r13],xmm2
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ movdqu XMMWORD PTR[48+r13],xmm5
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm3
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_enc_done::
+ and ebx,15
+ jz $L$xts_enc_ret
+ mov rdx,r13
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[r12]
+ movzx ecx,BYTE PTR[((-16))+rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[((-16))+rdx],al
+ mov BYTE PTR[rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_enc_steal
+
+ movdqu xmm15,XMMWORD PTR[((-16))+r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[(-16)+r13],xmm6
+
+$L$xts_enc_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_encrypt ENDP
+
+PUBLIC bsaes_xts_decrypt
+
+ALIGN 16
+bsaes_xts_decrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_dec_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ xor eax,eax
+ and r14,-16
+ test ebx,15
+ setnz al
+ shl rax,4
+ sub r14,rax
+
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_dec_short
+ jmp $L$xts_dec_loop
+
+ALIGN 16
+$L$xts_dec_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_dec_loop
+
+$L$xts_dec_short::
+ add r14,080h
+ jz $L$xts_dec_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_dec_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_dec_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_dec_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_dec_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_dec_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_dec_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_dec_done::
+ and ebx,15
+ jz $L$xts_dec_ret
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ movdqa xmm5,xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ movdqu xmm15,XMMWORD PTR[r12]
+ pxor xmm6,xmm13
+
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ mov rdx,r13
+ movdqu XMMWORD PTR[r13],xmm6
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+r12]
+ movzx ecx,BYTE PTR[rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[rdx],al
+ mov BYTE PTR[16+rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_dec_steal
+
+ movdqu xmm15,XMMWORD PTR[r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm5
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm5,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm5
+
+$L$xts_dec_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_decrypt ENDP
+
+ALIGN 64
+_bsaes_const::
+$L$M0ISR::
+ DQ 00a0e0206070b0f03h,00004080c0d010509h
+$L$ISRM0::
+ DQ 001040b0e0205080fh,00306090c00070a0dh
+$L$ISR::
+ DQ 00504070602010003h,00f0e0d0c080b0a09h
+$L$BS0::
+ DQ 05555555555555555h,05555555555555555h
+$L$BS1::
+ DQ 03333333333333333h,03333333333333333h
+$L$BS2::
+ DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
+$L$SR::
+ DQ 00504070600030201h,00f0e0d0c0a09080bh
+$L$SRM0::
+ DQ 00304090e00050a0fh,001060b0c0207080dh
+$L$M0SR::
+ DQ 00a0e02060f03070bh,00004080c05090d01h
+$L$SWPUP::
+ DQ 00706050403020100h,00c0d0e0f0b0a0908h
+$L$SWPUPM0SR::
+ DQ 00a0d02060c03070bh,00004080f05090e01h
+$L$ADD1::
+ DQ 00000000000000000h,00000000100000000h
+$L$ADD2::
+ DQ 00000000000000000h,00000000200000000h
+$L$ADD3::
+ DQ 00000000000000000h,00000000300000000h
+$L$ADD4::
+ DQ 00000000000000000h,00000000400000000h
+$L$ADD5::
+ DQ 00000000000000000h,00000000500000000h
+$L$ADD6::
+ DQ 00000000000000000h,00000000600000000h
+$L$ADD7::
+ DQ 00000000000000000h,00000000700000000h
+$L$ADD8::
+ DQ 00000000000000000h,00000000800000000h
+$L$xts_magic::
+ DD 087h,0,1,0
+$L$masks::
+ DQ 00101010101010101h,00101010101010101h
+ DQ 00202020202020202h,00202020202020202h
+ DQ 00404040404040404h,00404040404040404h
+ DQ 00808080808080808h,00808080808080808h
+$L$M0::
+ DQ 002060a0e03070b0fh,00004080c0105090dh
+$L$63::
+ DQ 06363636363636363h,06363636363636363h
+DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
+DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
+DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
+DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
+DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov rax,QWORD PTR[160+r8]
+
+ lea rsi,QWORD PTR[64+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[160+rax]
+
+ mov rbp,QWORD PTR[112+rax]
+ mov rbx,QWORD PTR[104+rax]
+ mov r12,QWORD PTR[96+rax]
+ mov r13,QWORD PTR[88+rax]
+ mov r14,QWORD PTR[80+rax]
+ mov r15,QWORD PTR[72+rax]
+ lea rax,QWORD PTR[120+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov QWORD PTR[152+r8],rax
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$cbc_dec_prologue
+ DD imagerel $L$cbc_dec_epilogue
+ DD imagerel $L$cbc_dec_info
+
+ DD imagerel $L$ctr_enc_prologue
+ DD imagerel $L$ctr_enc_epilogue
+ DD imagerel $L$ctr_enc_info
+
+ DD imagerel $L$xts_enc_prologue
+ DD imagerel $L$xts_enc_epilogue
+ DD imagerel $L$xts_enc_info
+
+ DD imagerel $L$xts_dec_prologue
+ DD imagerel $L$xts_dec_epilogue
+ DD imagerel $L$xts_dec_info
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$cbc_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
+$L$ctr_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
+$L$xts_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$xts_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/aes/vpaes-x86_64.asm b/win-x86_64/crypto/aes/vpaes-x86_64.asm
new file mode 100644
index 0000000..292f64d
--- /dev/null
+++ b/win-x86_64/crypto/aes/vpaes-x86_64.asm
@@ -0,0 +1,1143 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_encrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov r11,16
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_ipt]
+ pandn xmm1,xmm0
+ movdqu xmm5,XMMWORD PTR[r9]
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))]
+DB 102,15,56,0,193
+ pxor xmm2,xmm5
+ add r9,16
+ pxor xmm0,xmm2
+ lea r10,QWORD PTR[$L$k_mc_backward]
+ jmp $L$enc_entry
+
+ALIGN 16
+$L$enc_loop::
+
+ movdqa xmm4,xmm13
+ movdqa xmm0,xmm12
+DB 102,15,56,0,226
+DB 102,15,56,0,195
+ pxor xmm4,xmm5
+ movdqa xmm5,xmm15
+ pxor xmm0,xmm4
+ movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11]
+DB 102,15,56,0,234
+ movdqa xmm4,XMMWORD PTR[r10*1+r11]
+ movdqa xmm2,xmm14
+DB 102,15,56,0,211
+ movdqa xmm3,xmm0
+ pxor xmm2,xmm5
+DB 102,15,56,0,193
+ add r9,16
+ pxor xmm0,xmm2
+DB 102,15,56,0,220
+ add r11,16
+ pxor xmm3,xmm0
+DB 102,15,56,0,193
+ and r11,030h
+ sub rax,1
+ pxor xmm0,xmm3
+
+$L$enc_entry::
+
+ movdqa xmm1,xmm9
+ movdqa xmm5,xmm11
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,232
+ movdqa xmm3,xmm10
+ pxor xmm0,xmm1
+DB 102,15,56,0,217
+ movdqa xmm4,xmm10
+ pxor xmm3,xmm5
+DB 102,15,56,0,224
+ movdqa xmm2,xmm10
+ pxor xmm4,xmm5
+DB 102,15,56,0,211
+ movdqa xmm3,xmm10
+ pxor xmm2,xmm0
+DB 102,15,56,0,220
+ movdqu xmm5,XMMWORD PTR[r9]
+ pxor xmm3,xmm1
+ jnz $L$enc_loop
+
+
+ movdqa xmm4,XMMWORD PTR[((-96))+r10]
+ movdqa xmm0,XMMWORD PTR[((-80))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+DB 102,15,56,0,195
+ movdqa xmm1,XMMWORD PTR[64+r10*1+r11]
+ pxor xmm0,xmm4
+DB 102,15,56,0,193
+ DB 0F3h,0C3h ;repret
+_vpaes_encrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_decrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_dipt]
+ pandn xmm1,xmm0
+ mov r11,rax
+ psrld xmm1,4
+ movdqu xmm5,XMMWORD PTR[r9]
+ shl r11,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))]
+ xor r11,030h
+ lea r10,QWORD PTR[$L$k_dsbd]
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm2,xmm5
+ movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))]
+ pxor xmm0,xmm2
+ add r9,16
+ add r11,r10
+ jmp $L$dec_entry
+
+ALIGN 16
+$L$dec_loop::
+
+
+
+ movdqa xmm4,XMMWORD PTR[((-32))+r10]
+ movdqa xmm1,XMMWORD PTR[((-16))+r10]
+DB 102,15,56,0,226
+DB 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,XMMWORD PTR[r10]
+ pxor xmm0,xmm1
+ movdqa xmm1,XMMWORD PTR[16+r10]
+
+DB 102,15,56,0,226
+DB 102,15,56,0,197
+DB 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,XMMWORD PTR[32+r10]
+ pxor xmm0,xmm1
+ movdqa xmm1,XMMWORD PTR[48+r10]
+
+DB 102,15,56,0,226
+DB 102,15,56,0,197
+DB 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,XMMWORD PTR[64+r10]
+ pxor xmm0,xmm1
+ movdqa xmm1,XMMWORD PTR[80+r10]
+
+DB 102,15,56,0,226
+DB 102,15,56,0,197
+DB 102,15,56,0,203
+ pxor xmm0,xmm4
+ add r9,16
+DB 102,15,58,15,237,12
+ pxor xmm0,xmm1
+ sub rax,1
+
+$L$dec_entry::
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ movdqa xmm2,xmm11
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm3,xmm10
+ pxor xmm0,xmm1
+DB 102,15,56,0,217
+ movdqa xmm4,xmm10
+ pxor xmm3,xmm2
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ movdqa xmm3,xmm10
+ pxor xmm2,xmm0
+DB 102,15,56,0,220
+ movdqu xmm0,XMMWORD PTR[r9]
+ pxor xmm3,xmm1
+ jnz $L$dec_loop
+
+
+ movdqa xmm4,XMMWORD PTR[96+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[112+r10]
+ movdqa xmm2,XMMWORD PTR[((-352))+r11]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,56,0,194
+ DB 0F3h,0C3h ;repret
+_vpaes_decrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_core PROC PRIVATE
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa xmm8,XMMWORD PTR[$L$k_rcon]
+ movdqu xmm0,XMMWORD PTR[rdi]
+
+
+ movdqa xmm3,xmm0
+ lea r11,QWORD PTR[$L$k_ipt]
+ call _vpaes_schedule_transform
+ movdqa xmm7,xmm0
+
+ lea r10,QWORD PTR[$L$k_sr]
+ test rcx,rcx
+ jnz $L$schedule_am_decrypting
+
+
+ movdqu XMMWORD PTR[rdx],xmm0
+ jmp $L$schedule_go
+
+$L$schedule_am_decrypting::
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ movdqu XMMWORD PTR[rdx],xmm3
+ xor r8,030h
+
+$L$schedule_go::
+ cmp esi,192
+ ja $L$schedule_256
+ je $L$schedule_192
+
+
+
+
+
+
+
+
+
+
+$L$schedule_128::
+ mov esi,10
+
+$L$oop_schedule_128::
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp $L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_192::
+ movdqu xmm0,XMMWORD PTR[8+rdi]
+ call _vpaes_schedule_transform
+ movdqa xmm6,xmm0
+ pxor xmm4,xmm4
+ movhlps xmm6,xmm4
+ mov esi,4
+
+$L$oop_schedule_192::
+ call _vpaes_schedule_round
+DB 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp $L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_256::
+ movdqu xmm0,XMMWORD PTR[16+rdi]
+ call _vpaes_schedule_transform
+ mov esi,7
+
+$L$oop_schedule_256::
+ call _vpaes_schedule_mangle
+ movdqa xmm6,xmm0
+
+
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd xmm0,xmm0,0FFh
+ movdqa xmm5,xmm7
+ movdqa xmm7,xmm6
+ call _vpaes_schedule_low_round
+ movdqa xmm7,xmm5
+
+ jmp $L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_mangle_last::
+
+ lea r11,QWORD PTR[$L$k_deskew]
+ test rcx,rcx
+ jnz $L$schedule_mangle_last_dec
+
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,193
+ lea r11,QWORD PTR[$L$k_opt]
+ add rdx,32
+
+$L$schedule_mangle_last_dec::
+ add rdx,-16
+ pxor xmm0,XMMWORD PTR[$L$k_s63]
+ call _vpaes_schedule_transform
+ movdqu XMMWORD PTR[rdx],xmm0
+
+
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_core ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_192_smear PROC PRIVATE
+ pshufd xmm1,xmm6,080h
+ pshufd xmm0,xmm7,0FEh
+ pxor xmm6,xmm1
+ pxor xmm1,xmm1
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm6
+ movhlps xmm6,xmm1
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_192_smear ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_round PROC PRIVATE
+
+ pxor xmm1,xmm1
+DB 102,65,15,58,15,200,15
+DB 102,69,15,58,15,192,15
+ pxor xmm7,xmm1
+
+
+ pshufd xmm0,xmm0,0FFh
+DB 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round::
+
+ movdqa xmm1,xmm7
+ pslldq xmm7,4
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm7
+ pslldq xmm7,8
+ pxor xmm7,xmm1
+ pxor xmm7,XMMWORD PTR[$L$k_s63]
+
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+
+ pxor xmm0,xmm7
+ movdqa xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_round ENDP
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_transform PROC PRIVATE
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[16+r11]
+DB 102,15,56,0,193
+ pxor xmm0,xmm2
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_transform ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_mangle PROC PRIVATE
+ movdqa xmm4,xmm0
+ movdqa xmm5,XMMWORD PTR[$L$k_mc_forward]
+ test rcx,rcx
+ jnz $L$schedule_mangle_dec
+
+
+ add rdx,16
+ pxor xmm4,XMMWORD PTR[$L$k_s63]
+DB 102,15,56,0,229
+ movdqa xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+
+ jmp $L$schedule_mangle_both
+ALIGN 16
+$L$schedule_mangle_dec::
+
+ lea r11,QWORD PTR[$L$k_dksd]
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm4
+ psrld xmm1,4
+ pand xmm4,xmm9
+
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,212
+ movdqa xmm3,XMMWORD PTR[16+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[32+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[48+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[64+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[80+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[96+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[112+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+
+ add rdx,-16
+
+$L$schedule_mangle_both::
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ add r8,-16
+ and r8,030h
+ movdqu XMMWORD PTR[rdx],xmm3
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_mangle ENDP
+
+
+
+
+PUBLIC vpaes_set_encrypt_key
+
+ALIGN 16
+vpaes_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+
+ mov ecx,0
+ mov r8d,030h
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_encrypt_key::
+vpaes_set_encrypt_key ENDP
+
+PUBLIC vpaes_set_decrypt_key
+
+ALIGN 16
+vpaes_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+ shl eax,4
+ lea rdx,QWORD PTR[16+rax*1+rdx]
+
+ mov ecx,1
+ mov r8d,esi
+ shr r8d,1
+ and r8d,32
+ xor r8d,32
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_decrypt_key::
+vpaes_set_decrypt_key ENDP
+
+PUBLIC vpaes_encrypt
+
+ALIGN 16
+vpaes_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_encrypt::
+vpaes_encrypt ENDP
+
+PUBLIC vpaes_decrypt
+
+ALIGN 16
+vpaes_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_decrypt::
+vpaes_decrypt ENDP
+PUBLIC vpaes_cbc_encrypt
+
+ALIGN 16
+vpaes_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ xchg rdx,rcx
+ sub rcx,16
+ jc $L$cbc_abort
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$cbc_body::
+ movdqu xmm6,XMMWORD PTR[r8]
+ sub rsi,rdi
+ call _vpaes_preheat
+ cmp r9d,0
+ je $L$cbc_dec_loop
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ pxor xmm0,xmm6
+ call _vpaes_encrypt_core
+ movdqa xmm6,xmm0
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_enc_loop
+ jmp $L$cbc_done
+ALIGN 16
+$L$cbc_dec_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ movdqa xmm7,xmm0
+ call _vpaes_decrypt_core
+ pxor xmm0,xmm6
+ movdqa xmm6,xmm7
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_dec_loop
+$L$cbc_done::
+ movdqu XMMWORD PTR[r8],xmm6
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$cbc_epilogue::
+$L$cbc_abort::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_cbc_encrypt::
+vpaes_cbc_encrypt ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_preheat PROC PRIVATE
+ lea r10,QWORD PTR[$L$k_s0F]
+ movdqa xmm10,XMMWORD PTR[((-32))+r10]
+ movdqa xmm11,XMMWORD PTR[((-16))+r10]
+ movdqa xmm9,XMMWORD PTR[r10]
+ movdqa xmm13,XMMWORD PTR[48+r10]
+ movdqa xmm12,XMMWORD PTR[64+r10]
+ movdqa xmm15,XMMWORD PTR[80+r10]
+ movdqa xmm14,XMMWORD PTR[96+r10]
+ DB 0F3h,0C3h ;repret
+_vpaes_preheat ENDP
+
+
+
+
+
+
+ALIGN 64
+_vpaes_consts::
+$L$k_inv::
+ DQ 00E05060F0D080180h,0040703090A0B0C02h
+ DQ 001040A060F0B0780h,0030D0E0C02050809h
+
+$L$k_s0F::
+ DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh
+
+$L$k_ipt::
+ DQ 0C2B2E8985A2A7000h,0CABAE09052227808h
+ DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h
+
+$L$k_sb1::
+ DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h
+ DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh
+$L$k_sb2::
+ DQ 0E27A93C60B712400h,05EB7E955BC982FCDh
+ DQ 069EB88400AE12900h,0C2A163C8AB82234Ah
+$L$k_sbo::
+ DQ 0D0D26D176FBDC700h,015AABF7AC502A878h
+ DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh
+
+$L$k_mc_forward::
+ DQ 00407060500030201h,00C0F0E0D080B0A09h
+ DQ 0080B0A0904070605h,0000302010C0F0E0Dh
+ DQ 00C0F0E0D080B0A09h,00407060500030201h
+ DQ 0000302010C0F0E0Dh,0080B0A0904070605h
+
+$L$k_mc_backward::
+ DQ 00605040702010003h,00E0D0C0F0A09080Bh
+ DQ 0020100030E0D0C0Fh,00A09080B06050407h
+ DQ 00E0D0C0F0A09080Bh,00605040702010003h
+ DQ 00A09080B06050407h,0020100030E0D0C0Fh
+
+$L$k_sr::
+ DQ 00706050403020100h,00F0E0D0C0B0A0908h
+ DQ 0030E09040F0A0500h,00B06010C07020D08h
+ DQ 00F060D040B020900h,0070E050C030A0108h
+ DQ 00B0E0104070A0D00h,00306090C0F020508h
+
+$L$k_rcon::
+ DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h
+
+$L$k_s63::
+ DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh
+
+$L$k_opt::
+ DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h
+ DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h
+
+$L$k_deskew::
+ DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah
+ DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h
+
+
+
+
+
+$L$k_dksd::
+ DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h
+ DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh
+$L$k_dksb::
+ DQ 09A4FCA1F8550D500h,003D653861CC94C99h
+ DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h
+$L$k_dkse::
+ DQ 0D5031CCA1FC9D600h,053859A4C994F5086h
+ DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h
+$L$k_dks9::
+ DQ 0B6116FC87ED9A700h,04AED933482255BFCh
+ DQ 04576516227143300h,08BB89FACE9DAFDCEh
+
+
+
+
+
+$L$k_dipt::
+ DQ 00F505B040B545F00h,0154A411E114E451Ah
+ DQ 086E383E660056500h,012771772F491F194h
+
+$L$k_dsb9::
+ DQ 0851C03539A86D600h,0CAD51F504F994CC9h
+ DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h
+$L$k_dsbd::
+ DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h
+ DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h
+$L$k_dsbb::
+ DQ 0D022649296B44200h,0602646F6B0F2D404h
+ DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh
+$L$k_dsbe::
+ DQ 046F2929626D4D000h,02242600464B4F6B0h
+ DQ 00C55A6CDFFAAC100h,09467F36B98593E32h
+$L$k_dsbo::
+ DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh
+ DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch
+DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB 85,110,105,118,101,114,115,105,116,121,41,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rsi,QWORD PTR[16+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[184+rax]
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_encrypt
+ DD imagerel $L$SEH_end_vpaes_encrypt
+ DD imagerel $L$SEH_info_vpaes_encrypt
+
+ DD imagerel $L$SEH_begin_vpaes_decrypt
+ DD imagerel $L$SEH_end_vpaes_decrypt
+ DD imagerel $L$SEH_info_vpaes_decrypt
+
+ DD imagerel $L$SEH_begin_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_end_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_info_vpaes_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_vpaes_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue
+$L$SEH_info_vpaes_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue
+$L$SEH_info_vpaes_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_body,imagerel $L$enc_epilogue
+$L$SEH_info_vpaes_decrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_body,imagerel $L$dec_epilogue
+$L$SEH_info_vpaes_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/bn/modexp512-x86_64.asm b/win-x86_64/crypto/bn/modexp512-x86_64.asm
new file mode 100644
index 0000000..d3e4a61
--- /dev/null
+++ b/win-x86_64/crypto/bn/modexp512-x86_64.asm
@@ -0,0 +1,1887 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+
+ALIGN 16
+MULADD_128x512 PROC PRIVATE
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[rcx],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov rbp,QWORD PTR[8+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ mov QWORD PTR[8+rcx],r9
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov r9,rdx
+ DB 0F3h,0C3h ;repret
+MULADD_128x512 ENDP
+
+ALIGN 16
+mont_reduce PROC PRIVATE
+ lea rdi,QWORD PTR[192+rsp]
+ mov rsi,QWORD PTR[32+rsp]
+ add rsi,576
+ lea rcx,QWORD PTR[520+rsp]
+
+ mov rbp,QWORD PTR[96+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ mov r8,QWORD PTR[rcx]
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[rdi],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ mov r9,QWORD PTR[8+rcx]
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ mov r10,QWORD PTR[16+rcx]
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ mov r11,QWORD PTR[24+rcx]
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ mov r12,QWORD PTR[32+rcx]
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ mov r13,QWORD PTR[40+rcx]
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ mov r14,QWORD PTR[48+rcx]
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ mov r15,QWORD PTR[56+rcx]
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov rbp,QWORD PTR[104+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ mov QWORD PTR[8+rdi],r9
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov r9,rdx
+ mov rbp,QWORD PTR[112+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov QWORD PTR[16+rdi],r10
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov r10,rdx
+ mov rbp,QWORD PTR[120+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[24+rdi],r11
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov r11,rdx
+ xor rax,rax
+
+ add r8,QWORD PTR[64+rcx]
+ adc r9,QWORD PTR[72+rcx]
+ adc r10,QWORD PTR[80+rcx]
+ adc r11,QWORD PTR[88+rcx]
+ adc rax,0
+
+
+
+
+ mov QWORD PTR[64+rdi],r8
+ mov QWORD PTR[72+rdi],r9
+ mov rbp,r10
+ mov QWORD PTR[88+rdi],r11
+
+ mov QWORD PTR[384+rsp],rax
+
+ mov r8,QWORD PTR[rdi]
+ mov r9,QWORD PTR[8+rdi]
+ mov r10,QWORD PTR[16+rdi]
+ mov r11,QWORD PTR[24+rdi]
+
+
+
+
+
+
+
+
+ add rdi,8*10
+
+ add rsi,64
+ lea rcx,QWORD PTR[296+rsp]
+
+ call MULADD_128x512
+
+ mov rax,QWORD PTR[384+rsp]
+
+
+ add r8,QWORD PTR[((-16))+rdi]
+ adc r9,QWORD PTR[((-8))+rdi]
+ mov QWORD PTR[64+rcx],r8
+ mov QWORD PTR[72+rcx],r9
+
+ adc rax,rax
+ mov QWORD PTR[384+rsp],rax
+
+ lea rdi,QWORD PTR[192+rsp]
+ add rsi,64
+
+
+
+
+
+ mov r8,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+
+ mov rax,QWORD PTR[rcx]
+ mul r8
+ mov rbp,rax
+ mov r9,rdx
+
+ mov rax,QWORD PTR[8+rcx]
+ mul r8
+ add r9,rax
+
+ mov rax,QWORD PTR[rcx]
+ mul rbx
+ add r9,rax
+
+ mov QWORD PTR[8+rdi],r9
+
+
+ sub rsi,192
+
+ mov r8,QWORD PTR[rcx]
+ mov r9,QWORD PTR[8+rcx]
+
+ call MULADD_128x512
+
+
+
+
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdi,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+
+
+ mov rbp,QWORD PTR[384+rsp]
+
+ add r8,QWORD PTR[64+rcx]
+ adc r9,QWORD PTR[72+rcx]
+
+
+ adc rbp,rbp
+
+
+
+ shl rbp,3
+ mov rcx,QWORD PTR[32+rsp]
+ add rbp,rcx
+
+
+ xor rsi,rsi
+
+ add r10,QWORD PTR[rbp]
+ adc r11,QWORD PTR[64+rbp]
+ adc r12,QWORD PTR[128+rbp]
+ adc r13,QWORD PTR[192+rbp]
+ adc r14,QWORD PTR[256+rbp]
+ adc r15,QWORD PTR[320+rbp]
+ adc r8,QWORD PTR[384+rbp]
+ adc r9,QWORD PTR[448+rbp]
+
+
+
+ sbb rsi,0
+
+
+ and rax,rsi
+ and rbx,rsi
+ and rdi,rsi
+ and rdx,rsi
+
+ mov rbp,1
+ sub r10,rax
+ sbb r11,rbx
+ sbb r12,rdi
+ sbb r13,rdx
+
+
+
+
+ sbb rbp,0
+
+
+
+ add rcx,512
+ mov rax,QWORD PTR[32+rcx]
+ mov rbx,QWORD PTR[40+rcx]
+ mov rdi,QWORD PTR[48+rcx]
+ mov rdx,QWORD PTR[56+rcx]
+
+
+
+ and rax,rsi
+ and rbx,rsi
+ and rdi,rsi
+ and rdx,rsi
+
+
+
+ sub rbp,1
+
+ sbb r14,rax
+ sbb r15,rbx
+ sbb r8,rdi
+ sbb r9,rdx
+
+
+
+ mov rsi,QWORD PTR[144+rsp]
+ mov QWORD PTR[rsi],r10
+ mov QWORD PTR[8+rsi],r11
+ mov QWORD PTR[16+rsi],r12
+ mov QWORD PTR[24+rsi],r13
+ mov QWORD PTR[32+rsi],r14
+ mov QWORD PTR[40+rsi],r15
+ mov QWORD PTR[48+rsi],r8
+ mov QWORD PTR[56+rsi],r9
+
+ DB 0F3h,0C3h ;repret
+mont_reduce ENDP
+
+ALIGN 16
+mont_mul_a3b PROC PRIVATE
+
+
+
+
+ mov rbp,QWORD PTR[rdi]
+
+ mov rax,r10
+ mul rbp
+ mov QWORD PTR[520+rsp],rax
+ mov r10,rdx
+ mov rax,r11
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov r11,rdx
+ mov rax,r12
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov r12,rdx
+ mov rax,r13
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ mov r13,rdx
+ mov rax,r14
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ mov r14,rdx
+ mov rax,r15
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ mov r15,rdx
+ mov rax,r8
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ mov r8,rdx
+ mov rax,r9
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov r9,rdx
+ mov rbp,QWORD PTR[8+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov QWORD PTR[528+rsp],r10
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov r10,rdx
+ mov rbp,QWORD PTR[16+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[536+rsp],r11
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov r11,rdx
+ mov rbp,QWORD PTR[24+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ mov QWORD PTR[544+rsp],r12
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov r12,rdx
+ mov rbp,QWORD PTR[32+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ mov QWORD PTR[552+rsp],r13
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov r13,rdx
+ mov rbp,QWORD PTR[40+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ mov QWORD PTR[560+rsp],r14
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov r14,rdx
+ mov rbp,QWORD PTR[48+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[568+rsp],r15
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov r15,rdx
+ mov rbp,QWORD PTR[56+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[576+rsp],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov QWORD PTR[584+rsp],r9
+ mov QWORD PTR[592+rsp],r10
+ mov QWORD PTR[600+rsp],r11
+ mov QWORD PTR[608+rsp],r12
+ mov QWORD PTR[616+rsp],r13
+ mov QWORD PTR[624+rsp],r14
+ mov QWORD PTR[632+rsp],r15
+ mov QWORD PTR[640+rsp],r8
+
+
+
+
+
+ jmp mont_reduce
+
+
+mont_mul_a3b ENDP
+
+ALIGN 16
+sqr_reduce PROC PRIVATE
+ mov rcx,QWORD PTR[16+rsp]
+
+
+
+ mov rbx,r10
+
+ mov rax,r11
+ mul rbx
+ mov QWORD PTR[528+rsp],rax
+ mov r10,rdx
+ mov rax,r12
+ mul rbx
+ add r10,rax
+ adc rdx,0
+ mov r11,rdx
+ mov rax,r13
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov r12,rdx
+ mov rax,r14
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ mov r13,rdx
+ mov rax,r15
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ mov r14,rdx
+ mov rax,r8
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ mov r15,rdx
+ mov rax,r9
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov rsi,rdx
+
+ mov QWORD PTR[536+rsp],r10
+
+
+
+
+
+ mov rbx,QWORD PTR[8+rcx]
+
+ mov rax,QWORD PTR[16+rcx]
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[544+rsp],r11
+
+ mov r10,rdx
+ mov rax,QWORD PTR[24+rcx]
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+ mov QWORD PTR[552+rsp],r12
+
+ mov r10,rdx
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ add r14,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+
+ mov r11,rdx
+
+
+
+
+ mov rbx,QWORD PTR[16+rcx]
+
+ mov rax,QWORD PTR[24+rcx]
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ mov QWORD PTR[560+rsp],r13
+
+ mov r10,rdx
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ add r14,r10
+ adc rdx,0
+ mov QWORD PTR[568+rsp],r14
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+
+ mov r12,rdx
+
+
+
+
+
+ mov rbx,QWORD PTR[24+rcx]
+
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[576+rsp],r15
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+ mov QWORD PTR[584+rsp],rsi
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+
+ mov r15,rdx
+
+
+
+
+ mov rbx,QWORD PTR[32+rcx]
+
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[592+rsp],r11
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+ mov QWORD PTR[600+rsp],r12
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r11,rdx
+
+
+
+
+ mov rbx,QWORD PTR[40+rcx]
+
+ mov rax,r8
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[608+rsp],r15
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+ mov QWORD PTR[616+rsp],r11
+
+ mov r12,rdx
+
+
+
+
+ mov rbx,r8
+
+ mov rax,r9
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ mov QWORD PTR[624+rsp],r12
+
+ mov QWORD PTR[632+rsp],rdx
+
+
+ mov r10,QWORD PTR[528+rsp]
+ mov r11,QWORD PTR[536+rsp]
+ mov r12,QWORD PTR[544+rsp]
+ mov r13,QWORD PTR[552+rsp]
+ mov r14,QWORD PTR[560+rsp]
+ mov r15,QWORD PTR[568+rsp]
+
+ mov rax,QWORD PTR[24+rcx]
+ mul rax
+ mov rdi,rax
+ mov r8,rdx
+
+ add r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,r15
+ adc r8,0
+
+ mov rax,QWORD PTR[rcx]
+ mul rax
+ mov QWORD PTR[520+rsp],rax
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rcx]
+ mul rax
+
+ add r10,rbx
+ adc r11,rax
+ adc rdx,0
+
+ mov rbx,rdx
+ mov QWORD PTR[528+rsp],r10
+ mov QWORD PTR[536+rsp],r11
+
+ mov rax,QWORD PTR[16+rcx]
+ mul rax
+
+ add r12,rbx
+ adc r13,rax
+ adc rdx,0
+
+ mov rbx,rdx
+
+ mov QWORD PTR[544+rsp],r12
+ mov QWORD PTR[552+rsp],r13
+
+ xor rbp,rbp
+ add r14,rbx
+ adc r15,rdi
+ adc rbp,0
+
+ mov QWORD PTR[560+rsp],r14
+ mov QWORD PTR[568+rsp],r15
+
+
+
+
+ mov r10,QWORD PTR[576+rsp]
+ mov r11,QWORD PTR[584+rsp]
+ mov r12,QWORD PTR[592+rsp]
+ mov r13,QWORD PTR[600+rsp]
+ mov r14,QWORD PTR[608+rsp]
+ mov r15,QWORD PTR[616+rsp]
+ mov rdi,QWORD PTR[624+rsp]
+ mov rsi,QWORD PTR[632+rsp]
+
+ mov rax,r9
+ mul rax
+ mov r9,rax
+ mov rbx,rdx
+
+ add r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,r15
+ adc rdi,rdi
+ adc rsi,rsi
+ adc rbx,0
+
+ add r10,rbp
+
+ mov rax,QWORD PTR[32+rcx]
+ mul rax
+
+ add r10,r8
+ adc r11,rax
+ adc rdx,0
+
+ mov rbp,rdx
+
+ mov QWORD PTR[576+rsp],r10
+ mov QWORD PTR[584+rsp],r11
+
+ mov rax,QWORD PTR[40+rcx]
+ mul rax
+
+ add r12,rbp
+ adc r13,rax
+ adc rdx,0
+
+ mov rbp,rdx
+
+ mov QWORD PTR[592+rsp],r12
+ mov QWORD PTR[600+rsp],r13
+
+ mov rax,QWORD PTR[48+rcx]
+ mul rax
+
+ add r14,rbp
+ adc r15,rax
+ adc rdx,0
+
+ mov QWORD PTR[608+rsp],r14
+ mov QWORD PTR[616+rsp],r15
+
+ add rdi,rdx
+ adc rsi,r9
+ adc rbx,0
+
+ mov QWORD PTR[624+rsp],rdi
+ mov QWORD PTR[632+rsp],rsi
+ mov QWORD PTR[640+rsp],rbx
+
+ jmp mont_reduce
+
+
+sqr_reduce ENDP
+PUBLIC mod_exp_512
+
+mod_exp_512 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_mod_exp_512::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r8,rsp
+ sub rsp,2688
+ and rsp,-64
+
+
+ mov QWORD PTR[rsp],r8
+ mov QWORD PTR[8+rsp],rdi
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],rcx
+$L$body::
+
+
+
+ pxor xmm4,xmm4
+ movdqu xmm0,XMMWORD PTR[rsi]
+ movdqu xmm1,XMMWORD PTR[16+rsi]
+ movdqu xmm2,XMMWORD PTR[32+rsi]
+ movdqu xmm3,XMMWORD PTR[48+rsi]
+ movdqa XMMWORD PTR[512+rsp],xmm4
+ movdqa XMMWORD PTR[528+rsp],xmm4
+ movdqa XMMWORD PTR[608+rsp],xmm4
+ movdqa XMMWORD PTR[624+rsp],xmm4
+ movdqa XMMWORD PTR[544+rsp],xmm0
+ movdqa XMMWORD PTR[560+rsp],xmm1
+ movdqa XMMWORD PTR[576+rsp],xmm2
+ movdqa XMMWORD PTR[592+rsp],xmm3
+
+
+ movdqu xmm0,XMMWORD PTR[rdx]
+ movdqu xmm1,XMMWORD PTR[16+rdx]
+ movdqu xmm2,XMMWORD PTR[32+rdx]
+ movdqu xmm3,XMMWORD PTR[48+rdx]
+
+ lea rbx,QWORD PTR[384+rsp]
+ mov QWORD PTR[136+rsp],rbx
+ call mont_reduce
+
+
+ lea rcx,QWORD PTR[448+rsp]
+ xor rax,rax
+ mov QWORD PTR[rcx],rax
+ mov QWORD PTR[8+rcx],rax
+ mov QWORD PTR[24+rcx],rax
+ mov QWORD PTR[32+rcx],rax
+ mov QWORD PTR[40+rcx],rax
+ mov QWORD PTR[48+rcx],rax
+ mov QWORD PTR[56+rcx],rax
+ mov QWORD PTR[128+rsp],rax
+ mov QWORD PTR[16+rcx],1
+
+ lea rbp,QWORD PTR[640+rsp]
+ mov rsi,rcx
+ mov rdi,rbp
+ mov rax,8
+loop_0::
+ mov rbx,QWORD PTR[rcx]
+ mov WORD PTR[rdi],bx
+ shr rbx,16
+ mov WORD PTR[64+rdi],bx
+ shr rbx,16
+ mov WORD PTR[128+rdi],bx
+ shr rbx,16
+ mov WORD PTR[192+rdi],bx
+ lea rcx,QWORD PTR[8+rcx]
+ lea rdi,QWORD PTR[256+rdi]
+ dec rax
+ jnz loop_0
+ mov rax,31
+ mov QWORD PTR[32+rsp],rax
+ mov QWORD PTR[40+rsp],rbp
+
+ mov QWORD PTR[136+rsp],rsi
+ mov r10,QWORD PTR[rsi]
+ mov r11,QWORD PTR[8+rsi]
+ mov r12,QWORD PTR[16+rsi]
+ mov r13,QWORD PTR[24+rsi]
+ mov r14,QWORD PTR[32+rsi]
+ mov r15,QWORD PTR[40+rsi]
+ mov r8,QWORD PTR[48+rsi]
+ mov r9,QWORD PTR[56+rsi]
+init_loop::
+ lea rdi,QWORD PTR[384+rsp]
+ call mont_mul_a3b
+ lea rsi,QWORD PTR[448+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ add rbp,2
+ mov QWORD PTR[40+rsp],rbp
+ mov rcx,rsi
+ mov rax,8
+loop_1::
+ mov rbx,QWORD PTR[rcx]
+ mov WORD PTR[rbp],bx
+ shr rbx,16
+ mov WORD PTR[64+rbp],bx
+ shr rbx,16
+ mov WORD PTR[128+rbp],bx
+ shr rbx,16
+ mov WORD PTR[192+rbp],bx
+ lea rcx,QWORD PTR[8+rcx]
+ lea rbp,QWORD PTR[256+rbp]
+ dec rax
+ jnz loop_1
+ mov rax,QWORD PTR[32+rsp]
+ sub rax,1
+ mov QWORD PTR[32+rsp],rax
+ jne init_loop
+
+
+
+ movdqa XMMWORD PTR[64+rsp],xmm0
+ movdqa XMMWORD PTR[80+rsp],xmm1
+ movdqa XMMWORD PTR[96+rsp],xmm2
+ movdqa XMMWORD PTR[112+rsp],xmm3
+
+
+
+
+
+ mov eax,DWORD PTR[126+rsp]
+ mov rdx,rax
+ shr rax,11
+ and edx,007FFh
+ mov DWORD PTR[126+rsp],edx
+ lea rsi,QWORD PTR[640+rax*2+rsp]
+ mov rdx,QWORD PTR[8+rsp]
+ mov rbp,4
+loop_2::
+ movzx rbx,WORD PTR[192+rsi]
+ movzx rax,WORD PTR[448+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[128+rsi]
+ mov ax,WORD PTR[384+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[64+rsi]
+ mov ax,WORD PTR[320+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[rsi]
+ mov ax,WORD PTR[256+rsi]
+ mov QWORD PTR[rdx],rbx
+ mov QWORD PTR[8+rdx],rax
+ lea rsi,QWORD PTR[512+rsi]
+ lea rdx,QWORD PTR[16+rdx]
+ sub rbp,1
+ jnz loop_2
+ mov QWORD PTR[48+rsp],505
+
+ mov rcx,QWORD PTR[8+rsp]
+ mov QWORD PTR[136+rsp],rcx
+ mov r10,QWORD PTR[rcx]
+ mov r11,QWORD PTR[8+rcx]
+ mov r12,QWORD PTR[16+rcx]
+ mov r13,QWORD PTR[24+rcx]
+ mov r14,QWORD PTR[32+rcx]
+ mov r15,QWORD PTR[40+rcx]
+ mov r8,QWORD PTR[48+rcx]
+ mov r9,QWORD PTR[56+rcx]
+ jmp sqr_2
+
+main_loop_a3b::
+ call sqr_reduce
+ call sqr_reduce
+ call sqr_reduce
+sqr_2::
+ call sqr_reduce
+ call sqr_reduce
+
+
+
+ mov rcx,QWORD PTR[48+rsp]
+ mov rax,rcx
+ shr rax,4
+ mov edx,DWORD PTR[64+rax*2+rsp]
+ and rcx,15
+ shr rdx,cl
+ and rdx,01Fh
+
+ lea rsi,QWORD PTR[640+rdx*2+rsp]
+ lea rdx,QWORD PTR[448+rsp]
+ mov rdi,rdx
+ mov rbp,4
+loop_3::
+ movzx rbx,WORD PTR[192+rsi]
+ movzx rax,WORD PTR[448+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[128+rsi]
+ mov ax,WORD PTR[384+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[64+rsi]
+ mov ax,WORD PTR[320+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[rsi]
+ mov ax,WORD PTR[256+rsi]
+ mov QWORD PTR[rdx],rbx
+ mov QWORD PTR[8+rdx],rax
+ lea rsi,QWORD PTR[512+rsi]
+ lea rdx,QWORD PTR[16+rdx]
+ sub rbp,1
+ jnz loop_3
+ mov rsi,QWORD PTR[8+rsp]
+ call mont_mul_a3b
+
+
+
+ mov rcx,QWORD PTR[48+rsp]
+ sub rcx,5
+ mov QWORD PTR[48+rsp],rcx
+ jge main_loop_a3b
+
+
+
+end_main_loop_a3b::
+
+
+ mov rdx,QWORD PTR[8+rsp]
+ pxor xmm4,xmm4
+ movdqu xmm0,XMMWORD PTR[rdx]
+ movdqu xmm1,XMMWORD PTR[16+rdx]
+ movdqu xmm2,XMMWORD PTR[32+rdx]
+ movdqu xmm3,XMMWORD PTR[48+rdx]
+ movdqa XMMWORD PTR[576+rsp],xmm4
+ movdqa XMMWORD PTR[592+rsp],xmm4
+ movdqa XMMWORD PTR[608+rsp],xmm4
+ movdqa XMMWORD PTR[624+rsp],xmm4
+ movdqa XMMWORD PTR[512+rsp],xmm0
+ movdqa XMMWORD PTR[528+rsp],xmm1
+ movdqa XMMWORD PTR[544+rsp],xmm2
+ movdqa XMMWORD PTR[560+rsp],xmm3
+ call mont_reduce
+
+
+
+ mov rax,QWORD PTR[8+rsp]
+ mov r8,QWORD PTR[rax]
+ mov r9,QWORD PTR[8+rax]
+ mov r10,QWORD PTR[16+rax]
+ mov r11,QWORD PTR[24+rax]
+ mov r12,QWORD PTR[32+rax]
+ mov r13,QWORD PTR[40+rax]
+ mov r14,QWORD PTR[48+rax]
+ mov r15,QWORD PTR[56+rax]
+
+
+ mov rbx,QWORD PTR[24+rsp]
+ add rbx,512
+
+ sub r8,QWORD PTR[rbx]
+ sbb r9,QWORD PTR[8+rbx]
+ sbb r10,QWORD PTR[16+rbx]
+ sbb r11,QWORD PTR[24+rbx]
+ sbb r12,QWORD PTR[32+rbx]
+ sbb r13,QWORD PTR[40+rbx]
+ sbb r14,QWORD PTR[48+rbx]
+ sbb r15,QWORD PTR[56+rbx]
+
+
+ mov rsi,QWORD PTR[rax]
+ mov rdi,QWORD PTR[8+rax]
+ mov rcx,QWORD PTR[16+rax]
+ mov rdx,QWORD PTR[24+rax]
+ cmovnc rsi,r8
+ cmovnc rdi,r9
+ cmovnc rcx,r10
+ cmovnc rdx,r11
+ mov QWORD PTR[rax],rsi
+ mov QWORD PTR[8+rax],rdi
+ mov QWORD PTR[16+rax],rcx
+ mov QWORD PTR[24+rax],rdx
+
+ mov rsi,QWORD PTR[32+rax]
+ mov rdi,QWORD PTR[40+rax]
+ mov rcx,QWORD PTR[48+rax]
+ mov rdx,QWORD PTR[56+rax]
+ cmovnc rsi,r12
+ cmovnc rdi,r13
+ cmovnc rcx,r14
+ cmovnc rdx,r15
+ mov QWORD PTR[32+rax],rsi
+ mov QWORD PTR[40+rax],rdi
+ mov QWORD PTR[48+rax],rcx
+ mov QWORD PTR[56+rax],rdx
+
+ mov rsi,QWORD PTR[rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbx,QWORD PTR[32+rsi]
+ mov rbp,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_mod_exp_512::
+mod_exp_512 ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+mod_exp_512_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$body]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov rax,QWORD PTR[rax]
+
+ mov rbx,QWORD PTR[32+rax]
+ mov rbp,QWORD PTR[40+rax]
+ mov r12,QWORD PTR[24+rax]
+ mov r13,QWORD PTR[16+rax]
+ mov r14,QWORD PTR[8+rax]
+ mov r15,QWORD PTR[rax]
+ lea rax,QWORD PTR[48+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+mod_exp_512_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_mod_exp_512
+ DD imagerel $L$SEH_end_mod_exp_512
+ DD imagerel $L$SEH_info_mod_exp_512
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_mod_exp_512::
+DB 9,0,0,0
+ DD imagerel mod_exp_512_se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/bn/rsaz-avx2.asm b/win-x86_64/crypto/bn/rsaz-avx2.asm
new file mode 100644
index 0000000..f9188f5
--- /dev/null
+++ b/win-x86_64/crypto/bn/rsaz-avx2.asm
@@ -0,0 +1,29 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+PUBLIC rsaz_avx2_eligible
+
+rsaz_avx2_eligible PROC PUBLIC
+ xor eax,eax
+ DB 0F3h,0C3h ;repret
+rsaz_avx2_eligible ENDP
+
+PUBLIC rsaz_1024_sqr_avx2
+PUBLIC rsaz_1024_mul_avx2
+PUBLIC rsaz_1024_norm2red_avx2
+PUBLIC rsaz_1024_red2norm_avx2
+PUBLIC rsaz_1024_scatter5_avx2
+PUBLIC rsaz_1024_gather5_avx2
+
+rsaz_1024_sqr_avx2 PROC PUBLIC
+rsaz_1024_mul_avx2::
+rsaz_1024_norm2red_avx2::
+rsaz_1024_red2norm_avx2::
+rsaz_1024_scatter5_avx2::
+rsaz_1024_gather5_avx2::
+DB 00fh,00bh
+ DB 0F3h,0C3h ;repret
+rsaz_1024_sqr_avx2 ENDP
+
+.text$ ENDS
+END
diff --git a/win-x86_64/crypto/bn/rsaz-x86_64.asm b/win-x86_64/crypto/bn/rsaz-x86_64.asm
new file mode 100644
index 0000000..86e828d
--- /dev/null
+++ b/win-x86_64/crypto/bn/rsaz-x86_64.asm
@@ -0,0 +1,1326 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC rsaz_512_sqr
+
+ALIGN 32
+rsaz_512_sqr PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rsaz_512_sqr::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ sub rsp,128+24
+$L$sqr_body::
+ mov rbp,rdx
+ mov rdx,QWORD PTR[rsi]
+ mov rax,QWORD PTR[8+rsi]
+ mov QWORD PTR[128+rsp],rcx
+ jmp $L$oop_sqr
+
+ALIGN 32
+$L$oop_sqr::
+ mov DWORD PTR[((128+8))+rsp],r8d
+
+ mov rbx,rdx
+ mul rdx
+ mov r8,rax
+ mov rax,QWORD PTR[16+rsi]
+ mov r9,rdx
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[24+rsi]
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[32+rsi]
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[40+rsi]
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[48+rsi]
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[56+rsi]
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+ add r14,rax
+ mov rax,rbx
+ mov r15,rdx
+ adc r15,0
+
+ add r8,r8
+ mov rcx,r9
+ adc r9,r9
+
+ mul rax
+ mov QWORD PTR[rsp],rax
+ add r8,rdx
+ adc r9,0
+
+ mov QWORD PTR[8+rsp],r8
+ shr rcx,63
+
+
+ mov r8,QWORD PTR[8+rsi]
+ mov rax,QWORD PTR[16+rsi]
+ mul r8
+ add r10,rax
+ mov rax,QWORD PTR[24+rsi]
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r8
+ add r11,rax
+ mov rax,QWORD PTR[32+rsi]
+ adc rdx,0
+ add r11,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r8
+ add r12,rax
+ mov rax,QWORD PTR[40+rsi]
+ adc rdx,0
+ add r12,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r8
+ add r13,rax
+ mov rax,QWORD PTR[48+rsi]
+ adc rdx,0
+ add r13,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r8
+ add r14,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ add r14,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r8
+ add r15,rax
+ mov rax,r8
+ adc rdx,0
+ add r15,rbx
+ mov r8,rdx
+ mov rdx,r10
+ adc r8,0
+
+ add rdx,rdx
+ lea r10,QWORD PTR[r10*2+rcx]
+ mov rbx,r11
+ adc r11,r11
+
+ mul rax
+ add r9,rax
+ adc r10,rdx
+ adc r11,0
+
+ mov QWORD PTR[16+rsp],r9
+ mov QWORD PTR[24+rsp],r10
+ shr rbx,63
+
+
+ mov r9,QWORD PTR[16+rsi]
+ mov rax,QWORD PTR[24+rsi]
+ mul r9
+ add r12,rax
+ mov rax,QWORD PTR[32+rsi]
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r9
+ add r13,rax
+ mov rax,QWORD PTR[40+rsi]
+ adc rdx,0
+ add r13,rcx
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r9
+ add r14,rax
+ mov rax,QWORD PTR[48+rsi]
+ adc rdx,0
+ add r14,rcx
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r9
+ mov r10,r12
+ lea r12,QWORD PTR[r12*2+rbx]
+ add r15,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ add r15,rcx
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r9
+ shr r10,63
+ add r8,rax
+ mov rax,r9
+ adc rdx,0
+ add r8,rcx
+ mov r9,rdx
+ adc r9,0
+
+ mov rcx,r13
+ lea r13,QWORD PTR[r13*2+r10]
+
+ mul rax
+ add r11,rax
+ adc r12,rdx
+ adc r13,0
+
+ mov QWORD PTR[32+rsp],r11
+ mov QWORD PTR[40+rsp],r12
+ shr rcx,63
+
+
+ mov r10,QWORD PTR[24+rsi]
+ mov rax,QWORD PTR[32+rsi]
+ mul r10
+ add r14,rax
+ mov rax,QWORD PTR[40+rsi]
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r10
+ add r15,rax
+ mov rax,QWORD PTR[48+rsi]
+ adc rdx,0
+ add r15,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r10
+ mov r12,r14
+ lea r14,QWORD PTR[r14*2+rcx]
+ add r8,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ add r8,rbx
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r10
+ shr r12,63
+ add r9,rax
+ mov rax,r10
+ adc rdx,0
+ add r9,rbx
+ mov r10,rdx
+ adc r10,0
+
+ mov rbx,r15
+ lea r15,QWORD PTR[r15*2+r12]
+
+ mul rax
+ add r13,rax
+ adc r14,rdx
+ adc r15,0
+
+ mov QWORD PTR[48+rsp],r13
+ mov QWORD PTR[56+rsp],r14
+ shr rbx,63
+
+
+ mov r11,QWORD PTR[32+rsi]
+ mov rax,QWORD PTR[40+rsi]
+ mul r11
+ add r8,rax
+ mov rax,QWORD PTR[48+rsi]
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r11
+ add r9,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ mov r12,r8
+ lea r8,QWORD PTR[r8*2+rbx]
+ add r9,rcx
+ mov rcx,rdx
+ adc rcx,0
+
+ mul r11
+ shr r12,63
+ add r10,rax
+ mov rax,r11
+ adc rdx,0
+ add r10,rcx
+ mov r11,rdx
+ adc r11,0
+
+ mov rcx,r9
+ lea r9,QWORD PTR[r9*2+r12]
+
+ mul rax
+ add r15,rax
+ adc r8,rdx
+ adc r9,0
+
+ mov QWORD PTR[64+rsp],r15
+ mov QWORD PTR[72+rsp],r8
+ shr rcx,63
+
+
+ mov r12,QWORD PTR[40+rsi]
+ mov rax,QWORD PTR[48+rsi]
+ mul r12
+ add r10,rax
+ mov rax,QWORD PTR[56+rsi]
+ mov rbx,rdx
+ adc rbx,0
+
+ mul r12
+ add r11,rax
+ mov rax,r12
+ mov r15,r10
+ lea r10,QWORD PTR[r10*2+rcx]
+ adc rdx,0
+ shr r15,63
+ add r11,rbx
+ mov r12,rdx
+ adc r12,0
+
+ mov rbx,r11
+ lea r11,QWORD PTR[r11*2+r15]
+
+ mul rax
+ add r9,rax
+ adc r10,rdx
+ adc r11,0
+
+ mov QWORD PTR[80+rsp],r9
+ mov QWORD PTR[88+rsp],r10
+
+
+ mov r13,QWORD PTR[48+rsi]
+ mov rax,QWORD PTR[56+rsi]
+ mul r13
+ add r12,rax
+ mov rax,r13
+ mov r13,rdx
+ adc r13,0
+
+ xor r14,r14
+ shl rbx,1
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+
+ mul rax
+ add r11,rax
+ adc r12,rdx
+ adc r13,0
+
+ mov QWORD PTR[96+rsp],r11
+ mov QWORD PTR[104+rsp],r12
+
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rax
+ add r13,rax
+ adc rdx,0
+
+ add r14,rdx
+
+ mov QWORD PTR[112+rsp],r13
+ mov QWORD PTR[120+rsp],r14
+
+ mov r8,QWORD PTR[rsp]
+ mov r9,QWORD PTR[8+rsp]
+ mov r10,QWORD PTR[16+rsp]
+ mov r11,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov r13,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r15,QWORD PTR[56+rsp]
+
+ call __rsaz_512_reduce
+
+ add r8,QWORD PTR[64+rsp]
+ adc r9,QWORD PTR[72+rsp]
+ adc r10,QWORD PTR[80+rsp]
+ adc r11,QWORD PTR[88+rsp]
+ adc r12,QWORD PTR[96+rsp]
+ adc r13,QWORD PTR[104+rsp]
+ adc r14,QWORD PTR[112+rsp]
+ adc r15,QWORD PTR[120+rsp]
+ sbb rcx,rcx
+
+ call __rsaz_512_subtract
+
+ mov rdx,r8
+ mov rax,r9
+ mov r8d,DWORD PTR[((128+8))+rsp]
+ mov rsi,rdi
+
+ dec r8d
+ jnz $L$oop_sqr
+
+ lea rax,QWORD PTR[((128+24+48))+rsp]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ lea rsp,QWORD PTR[rax]
+$L$sqr_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_sqr::
+rsaz_512_sqr ENDP
+PUBLIC rsaz_512_mul
+
+ALIGN 32
+rsaz_512_mul PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rsaz_512_mul::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ sub rsp,128+24
+$L$mul_body::
+DB 102,72,15,110,199
+DB 102,72,15,110,201
+ mov QWORD PTR[128+rsp],r8
+ mov rbx,QWORD PTR[rdx]
+ mov rbp,rdx
+ call __rsaz_512_mul
+
+DB 102,72,15,126,199
+DB 102,72,15,126,205
+
+ mov r8,QWORD PTR[rsp]
+ mov r9,QWORD PTR[8+rsp]
+ mov r10,QWORD PTR[16+rsp]
+ mov r11,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov r13,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r15,QWORD PTR[56+rsp]
+
+ call __rsaz_512_reduce
+ add r8,QWORD PTR[64+rsp]
+ adc r9,QWORD PTR[72+rsp]
+ adc r10,QWORD PTR[80+rsp]
+ adc r11,QWORD PTR[88+rsp]
+ adc r12,QWORD PTR[96+rsp]
+ adc r13,QWORD PTR[104+rsp]
+ adc r14,QWORD PTR[112+rsp]
+ adc r15,QWORD PTR[120+rsp]
+ sbb rcx,rcx
+
+ call __rsaz_512_subtract
+
+ lea rax,QWORD PTR[((128+24+48))+rsp]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ lea rsp,QWORD PTR[rax]
+$L$mul_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_mul::
+rsaz_512_mul ENDP
+PUBLIC rsaz_512_mul_gather4
+
+ALIGN 32
+rsaz_512_mul_gather4 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rsaz_512_mul_gather4::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov r9d,r9d
+ sub rsp,128+24
+$L$mul_gather4_body::
+ mov eax,DWORD PTR[64+r9*4+rdx]
+DB 102,72,15,110,199
+ mov ebx,DWORD PTR[r9*4+rdx]
+DB 102,72,15,110,201
+ mov QWORD PTR[128+rsp],r8
+
+ shl rax,32
+ or rbx,rax
+ mov rax,QWORD PTR[rsi]
+ mov rcx,QWORD PTR[8+rsi]
+ lea rbp,QWORD PTR[128+r9*4+rdx]
+ mul rbx
+ mov QWORD PTR[rsp],rax
+ mov rax,rcx
+ mov r8,rdx
+
+ mul rbx
+ movd xmm4,DWORD PTR[rbp]
+ add r8,rax
+ mov rax,QWORD PTR[16+rsi]
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ movd xmm5,DWORD PTR[64+rbp]
+ add r9,rax
+ mov rax,QWORD PTR[24+rsi]
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ pslldq xmm5,4
+ add r10,rax
+ mov rax,QWORD PTR[32+rsi]
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ por xmm4,xmm5
+ add r11,rax
+ mov rax,QWORD PTR[40+rsi]
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[48+rsi]
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ lea rbp,QWORD PTR[128+rbp]
+ add r13,rax
+ mov rax,QWORD PTR[56+rsi]
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+DB 102,72,15,126,227
+ add r14,rax
+ mov rax,QWORD PTR[rsi]
+ mov r15,rdx
+ adc r15,0
+
+ lea rdi,QWORD PTR[8+rsp]
+ mov ecx,7
+ jmp $L$oop_mul_gather
+
+ALIGN 32
+$L$oop_mul_gather::
+ mul rbx
+ add r8,rax
+ mov rax,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],r8
+ mov r8,rdx
+ adc r8,0
+
+ mul rbx
+ movd xmm4,DWORD PTR[rbp]
+ add r9,rax
+ mov rax,QWORD PTR[16+rsi]
+ adc rdx,0
+ add r8,r9
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ movd xmm5,DWORD PTR[64+rbp]
+ add r10,rax
+ mov rax,QWORD PTR[24+rsi]
+ adc rdx,0
+ add r9,r10
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ pslldq xmm5,4
+ add r11,rax
+ mov rax,QWORD PTR[32+rsi]
+ adc rdx,0
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ por xmm4,xmm5
+ add r12,rax
+ mov rax,QWORD PTR[40+rsi]
+ adc rdx,0
+ add r11,r12
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[48+rsi]
+ adc rdx,0
+ add r12,r13
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ add r13,r14
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+DB 102,72,15,126,227
+ add r15,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r14,r15
+ mov r15,rdx
+ adc r15,0
+
+ lea rbp,QWORD PTR[128+rbp]
+ lea rdi,QWORD PTR[8+rdi]
+
+ dec ecx
+ jnz $L$oop_mul_gather
+
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+
+DB 102,72,15,126,199
+DB 102,72,15,126,205
+
+ mov r8,QWORD PTR[rsp]
+ mov r9,QWORD PTR[8+rsp]
+ mov r10,QWORD PTR[16+rsp]
+ mov r11,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov r13,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r15,QWORD PTR[56+rsp]
+
+ call __rsaz_512_reduce
+ add r8,QWORD PTR[64+rsp]
+ adc r9,QWORD PTR[72+rsp]
+ adc r10,QWORD PTR[80+rsp]
+ adc r11,QWORD PTR[88+rsp]
+ adc r12,QWORD PTR[96+rsp]
+ adc r13,QWORD PTR[104+rsp]
+ adc r14,QWORD PTR[112+rsp]
+ adc r15,QWORD PTR[120+rsp]
+ sbb rcx,rcx
+
+ call __rsaz_512_subtract
+
+ lea rax,QWORD PTR[((128+24+48))+rsp]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ lea rsp,QWORD PTR[rax]
+$L$mul_gather4_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_mul_gather4::
+rsaz_512_mul_gather4 ENDP
+PUBLIC rsaz_512_mul_scatter4
+
+ALIGN 32
+rsaz_512_mul_scatter4 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rsaz_512_mul_scatter4::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov r9d,r9d
+ sub rsp,128+24
+$L$mul_scatter4_body::
+ lea r8,QWORD PTR[r9*4+r8]
+DB 102,72,15,110,199
+DB 102,72,15,110,202
+DB 102,73,15,110,208
+ mov QWORD PTR[128+rsp],rcx
+
+ mov rbp,rdi
+ mov rbx,QWORD PTR[rdi]
+ call __rsaz_512_mul
+
+DB 102,72,15,126,199
+DB 102,72,15,126,205
+
+ mov r8,QWORD PTR[rsp]
+ mov r9,QWORD PTR[8+rsp]
+ mov r10,QWORD PTR[16+rsp]
+ mov r11,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov r13,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r15,QWORD PTR[56+rsp]
+
+ call __rsaz_512_reduce
+ add r8,QWORD PTR[64+rsp]
+ adc r9,QWORD PTR[72+rsp]
+ adc r10,QWORD PTR[80+rsp]
+ adc r11,QWORD PTR[88+rsp]
+ adc r12,QWORD PTR[96+rsp]
+ adc r13,QWORD PTR[104+rsp]
+ adc r14,QWORD PTR[112+rsp]
+ adc r15,QWORD PTR[120+rsp]
+DB 102,72,15,126,214
+ sbb rcx,rcx
+
+ call __rsaz_512_subtract
+
+ mov DWORD PTR[rsi],r8d
+ shr r8,32
+ mov DWORD PTR[128+rsi],r9d
+ shr r9,32
+ mov DWORD PTR[256+rsi],r10d
+ shr r10,32
+ mov DWORD PTR[384+rsi],r11d
+ shr r11,32
+ mov DWORD PTR[512+rsi],r12d
+ shr r12,32
+ mov DWORD PTR[640+rsi],r13d
+ shr r13,32
+ mov DWORD PTR[768+rsi],r14d
+ shr r14,32
+ mov DWORD PTR[896+rsi],r15d
+ shr r15,32
+ mov DWORD PTR[64+rsi],r8d
+ mov DWORD PTR[192+rsi],r9d
+ mov DWORD PTR[320+rsi],r10d
+ mov DWORD PTR[448+rsi],r11d
+ mov DWORD PTR[576+rsi],r12d
+ mov DWORD PTR[704+rsi],r13d
+ mov DWORD PTR[832+rsi],r14d
+ mov DWORD PTR[960+rsi],r15d
+
+ lea rax,QWORD PTR[((128+24+48))+rsp]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ lea rsp,QWORD PTR[rax]
+$L$mul_scatter4_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_mul_scatter4::
+rsaz_512_mul_scatter4 ENDP
+PUBLIC rsaz_512_mul_by_one
+
+ALIGN 32
+rsaz_512_mul_by_one PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rsaz_512_mul_by_one::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ sub rsp,128+24
+$L$mul_by_one_body::
+ mov rbp,rdx
+ mov QWORD PTR[128+rsp],rcx
+
+ mov r8,QWORD PTR[rsi]
+ pxor xmm0,xmm0
+ mov r9,QWORD PTR[8+rsi]
+ mov r10,QWORD PTR[16+rsi]
+ mov r11,QWORD PTR[24+rsi]
+ mov r12,QWORD PTR[32+rsi]
+ mov r13,QWORD PTR[40+rsi]
+ mov r14,QWORD PTR[48+rsi]
+ mov r15,QWORD PTR[56+rsi]
+
+ movdqa XMMWORD PTR[rsp],xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm0
+ movdqa XMMWORD PTR[64+rsp],xmm0
+ movdqa XMMWORD PTR[80+rsp],xmm0
+ movdqa XMMWORD PTR[96+rsp],xmm0
+ call __rsaz_512_reduce
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+
+ lea rax,QWORD PTR[((128+24+48))+rsp]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ lea rsp,QWORD PTR[rax]
+$L$mul_by_one_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_512_mul_by_one::
+rsaz_512_mul_by_one ENDP
+
+ALIGN 32
+__rsaz_512_reduce PROC PRIVATE
+ mov rbx,r8
+ imul rbx,QWORD PTR[((128+8))+rsp]
+ mov rax,QWORD PTR[rbp]
+ mov ecx,8
+ jmp $L$reduction_loop
+
+ALIGN 32
+$L$reduction_loop::
+ mul rbx
+ mov rax,QWORD PTR[8+rbp]
+ neg r8
+ mov r8,rdx
+ adc r8,0
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[16+rbp]
+ adc rdx,0
+ add r8,r9
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[24+rbp]
+ adc rdx,0
+ add r9,r10
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[32+rbp]
+ adc rdx,0
+ add r10,r11
+ mov rsi,QWORD PTR[((128+8))+rsp]
+
+
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[40+rbp]
+ adc rdx,0
+ imul rsi,r8
+ add r11,r12
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[48+rbp]
+ adc rdx,0
+ add r12,r13
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[56+rbp]
+ adc rdx,0
+ add r13,r14
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+ mov rbx,rsi
+ add r15,rax
+ mov rax,QWORD PTR[rbp]
+ adc rdx,0
+ add r14,r15
+ mov r15,rdx
+ adc r15,0
+
+ dec ecx
+ jne $L$reduction_loop
+
+ DB 0F3h,0C3h ;repret
+__rsaz_512_reduce ENDP
+
+ALIGN 32
+__rsaz_512_subtract PROC PRIVATE
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+
+ mov r8,QWORD PTR[rbp]
+ mov r9,QWORD PTR[8+rbp]
+ neg r8
+ not r9
+ and r8,rcx
+ mov r10,QWORD PTR[16+rbp]
+ and r9,rcx
+ not r10
+ mov r11,QWORD PTR[24+rbp]
+ and r10,rcx
+ not r11
+ mov r12,QWORD PTR[32+rbp]
+ and r11,rcx
+ not r12
+ mov r13,QWORD PTR[40+rbp]
+ and r12,rcx
+ not r13
+ mov r14,QWORD PTR[48+rbp]
+ and r13,rcx
+ not r14
+ mov r15,QWORD PTR[56+rbp]
+ and r14,rcx
+ not r15
+ and r15,rcx
+
+ add r8,QWORD PTR[rdi]
+ adc r9,QWORD PTR[8+rdi]
+ adc r10,QWORD PTR[16+rdi]
+ adc r11,QWORD PTR[24+rdi]
+ adc r12,QWORD PTR[32+rdi]
+ adc r13,QWORD PTR[40+rdi]
+ adc r14,QWORD PTR[48+rdi]
+ adc r15,QWORD PTR[56+rdi]
+
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+
+ DB 0F3h,0C3h ;repret
+__rsaz_512_subtract ENDP
+
+ALIGN 32
+__rsaz_512_mul PROC PRIVATE
+ lea rdi,QWORD PTR[8+rsp]
+
+ mov rax,QWORD PTR[rsi]
+ mul rbx
+ mov QWORD PTR[rdi],rax
+ mov rax,QWORD PTR[8+rsi]
+ mov r8,rdx
+
+ mul rbx
+ add r8,rax
+ mov rax,QWORD PTR[16+rsi]
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[24+rsi]
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[32+rsi]
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[40+rsi]
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[48+rsi]
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[56+rsi]
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[rsi]
+ mov r15,rdx
+ adc r15,0
+
+ lea rbp,QWORD PTR[8+rbp]
+ lea rdi,QWORD PTR[8+rdi]
+
+ mov ecx,7
+ jmp $L$oop_mul
+
+ALIGN 32
+$L$oop_mul::
+ mov rbx,QWORD PTR[rbp]
+ mul rbx
+ add r8,rax
+ mov rax,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],r8
+ mov r8,rdx
+ adc r8,0
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[16+rsi]
+ adc rdx,0
+ add r8,r9
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[24+rsi]
+ adc rdx,0
+ add r9,r10
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[32+rsi]
+ adc rdx,0
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[40+rsi]
+ adc rdx,0
+ add r11,r12
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[48+rsi]
+ adc rdx,0
+ add r12,r13
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[56+rsi]
+ adc rdx,0
+ add r13,r14
+ mov r14,rdx
+ lea rbp,QWORD PTR[8+rbp]
+ adc r14,0
+
+ mul rbx
+ add r15,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r14,r15
+ mov r15,rdx
+ adc r15,0
+
+ lea rdi,QWORD PTR[8+rdi]
+
+ dec ecx
+ jnz $L$oop_mul
+
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+
+ DB 0F3h,0C3h ;repret
+__rsaz_512_mul ENDP
+PUBLIC rsaz_512_scatter4
+
+ALIGN 16
+rsaz_512_scatter4 PROC PUBLIC
+ lea rcx,QWORD PTR[r8*4+rcx]
+ mov r9d,8
+ jmp $L$oop_scatter
+ALIGN 16
+$L$oop_scatter::
+ mov rax,QWORD PTR[rdx]
+ lea rdx,QWORD PTR[8+rdx]
+ mov DWORD PTR[rcx],eax
+ shr rax,32
+ mov DWORD PTR[64+rcx],eax
+ lea rcx,QWORD PTR[128+rcx]
+ dec r9d
+ jnz $L$oop_scatter
+ DB 0F3h,0C3h ;repret
+rsaz_512_scatter4 ENDP
+
+PUBLIC rsaz_512_gather4
+
+ALIGN 16
+rsaz_512_gather4 PROC PUBLIC
+ lea rdx,QWORD PTR[r8*4+rdx]
+ mov r9d,8
+ jmp $L$oop_gather
+ALIGN 16
+$L$oop_gather::
+ mov eax,DWORD PTR[rdx]
+ mov r8d,DWORD PTR[64+rdx]
+ lea rdx,QWORD PTR[128+rdx]
+ shl r8,32
+ or rax,r8
+ mov QWORD PTR[rcx],rax
+ lea rcx,QWORD PTR[8+rcx]
+ dec r9d
+ jnz $L$oop_gather
+ DB 0F3h,0C3h ;repret
+rsaz_512_gather4 ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rax,QWORD PTR[((128+24+48))+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_rsaz_512_sqr
+ DD imagerel $L$SEH_end_rsaz_512_sqr
+ DD imagerel $L$SEH_info_rsaz_512_sqr
+
+ DD imagerel $L$SEH_begin_rsaz_512_mul
+ DD imagerel $L$SEH_end_rsaz_512_mul
+ DD imagerel $L$SEH_info_rsaz_512_mul
+
+ DD imagerel $L$SEH_begin_rsaz_512_mul_gather4
+ DD imagerel $L$SEH_end_rsaz_512_mul_gather4
+ DD imagerel $L$SEH_info_rsaz_512_mul_gather4
+
+ DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4
+ DD imagerel $L$SEH_end_rsaz_512_mul_scatter4
+ DD imagerel $L$SEH_info_rsaz_512_mul_scatter4
+
+ DD imagerel $L$SEH_begin_rsaz_512_mul_by_one
+ DD imagerel $L$SEH_end_rsaz_512_mul_by_one
+ DD imagerel $L$SEH_info_rsaz_512_mul_by_one
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_rsaz_512_sqr::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue
+$L$SEH_info_rsaz_512_mul::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$mul_body,imagerel $L$mul_epilogue
+$L$SEH_info_rsaz_512_mul_gather4::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue
+$L$SEH_info_rsaz_512_mul_scatter4::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue
+$L$SEH_info_rsaz_512_mul_by_one::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/bn/x86_64-mont.asm b/win-x86_64/crypto/bn/x86_64-mont.asm
new file mode 100644
index 0000000..a409325
--- /dev/null
+++ b/win-x86_64/crypto/bn/x86_64-mont.asm
@@ -0,0 +1,945 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC bn_mul_mont
+
+ALIGN 16
+bn_mul_mont PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_mul_mont::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ test r9d,3
+ jnz $L$mul_enter
+ cmp r9d,8
+ jb $L$mul_enter
+ cmp rdx,rsi
+ jne $L$mul4x_enter
+ test r9d,7
+ jz $L$sqr8x_enter
+ jmp $L$mul4x_enter
+
+ALIGN 16
+$L$mul_enter::
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov r9d,r9d
+ lea r10,QWORD PTR[2+r9]
+ mov r11,rsp
+ neg r10
+ lea rsp,QWORD PTR[r10*8+rsp]
+ and rsp,-1024
+
+ mov QWORD PTR[8+r9*8+rsp],r11
+$L$mul_body::
+ mov r12,rdx
+ mov r8,QWORD PTR[r8]
+ mov rbx,QWORD PTR[r12]
+ mov rax,QWORD PTR[rsi]
+
+ xor r14,r14
+ xor r15,r15
+
+ mov rbp,r8
+ mul rbx
+ mov r10,rax
+ mov rax,QWORD PTR[rcx]
+
+ imul rbp,r10
+ mov r11,rdx
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov r13,rdx
+
+ lea r15,QWORD PTR[1+r15]
+ jmp $L$1st_enter
+
+ALIGN 16
+$L$1st::
+ add r13,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add r13,r11
+ mov r11,r10
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+$L$1st_enter::
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ lea r15,QWORD PTR[1+r15]
+ mov r10,rdx
+
+ mul rbp
+ cmp r15,r9
+ jne $L$1st
+
+ add r13,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r13,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+ mov r11,r10
+
+ xor rdx,rdx
+ add r13,r11
+ adc rdx,0
+ mov QWORD PTR[((-8))+r9*8+rsp],r13
+ mov QWORD PTR[r9*8+rsp],rdx
+
+ lea r14,QWORD PTR[1+r14]
+ jmp $L$outer
+ALIGN 16
+$L$outer::
+ mov rbx,QWORD PTR[r14*8+r12]
+ xor r15,r15
+ mov rbp,r8
+ mov r10,QWORD PTR[rsp]
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+
+ imul rbp,r10
+ mov r11,rdx
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov r10,QWORD PTR[8+rsp]
+ mov r13,rdx
+
+ lea r15,QWORD PTR[1+r15]
+ jmp $L$inner_enter
+
+ALIGN 16
+$L$inner::
+ add r13,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ mov r10,QWORD PTR[r15*8+rsp]
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+$L$inner_enter::
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+ lea r15,QWORD PTR[1+r15]
+
+ mul rbp
+ cmp r15,r9
+ jne $L$inner
+
+ add r13,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r13,r10
+ mov r10,QWORD PTR[r15*8+rsp]
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+ xor rdx,rdx
+ add r13,r11
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-8))+r9*8+rsp],r13
+ mov QWORD PTR[r9*8+rsp],rdx
+
+ lea r14,QWORD PTR[1+r14]
+ cmp r14,r9
+ jb $L$outer
+
+ xor r14,r14
+ mov rax,QWORD PTR[rsp]
+ lea rsi,QWORD PTR[rsp]
+ mov r15,r9
+ jmp $L$sub
+ALIGN 16
+$L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
+ mov QWORD PTR[r14*8+rdi],rax
+ mov rax,QWORD PTR[8+r14*8+rsi]
+ lea r14,QWORD PTR[1+r14]
+ dec r15
+ jnz $L$sub
+
+ sbb rax,0
+ xor r14,r14
+ mov r15,r9
+ALIGN 16
+$L$copy::
+ mov rsi,QWORD PTR[r14*8+rsp]
+ mov rcx,QWORD PTR[r14*8+rdi]
+ xor rsi,rcx
+ and rsi,rax
+ xor rsi,rcx
+ mov QWORD PTR[r14*8+rsp],r14
+ mov QWORD PTR[r14*8+rdi],rsi
+ lea r14,QWORD PTR[1+r14]
+ sub r15,1
+ jnz $L$copy
+
+ mov rsi,QWORD PTR[8+r9*8+rsp]
+ mov rax,1
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$mul_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_mul_mont::
+bn_mul_mont ENDP
+
+ALIGN 16
+bn_mul4x_mont PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_mul4x_mont::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+$L$mul4x_enter::
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov r9d,r9d
+ lea r10,QWORD PTR[4+r9]
+ mov r11,rsp
+ neg r10
+ lea rsp,QWORD PTR[r10*8+rsp]
+ and rsp,-1024
+
+ mov QWORD PTR[8+r9*8+rsp],r11
+$L$mul4x_body::
+ mov QWORD PTR[16+r9*8+rsp],rdi
+ mov r12,rdx
+ mov r8,QWORD PTR[r8]
+ mov rbx,QWORD PTR[r12]
+ mov rax,QWORD PTR[rsi]
+
+ xor r14,r14
+ xor r15,r15
+
+ mov rbp,r8
+ mul rbx
+ mov r10,rax
+ mov rax,QWORD PTR[rcx]
+
+ imul rbp,r10
+ mov r11,rdx
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[8+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea r15,QWORD PTR[4+r15]
+ adc rdx,0
+ mov QWORD PTR[rsp],rdi
+ mov r13,rdx
+ jmp $L$1st4x
+ALIGN 16
+$L$1st4x::
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rcx]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],rdi
+ mov r13,rdx
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[8+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-8))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[8+r15*8+rcx]
+ adc rdx,0
+ lea r15,QWORD PTR[4+r15]
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-32))+r15*8+rsp],rdi
+ mov r13,rdx
+ cmp r15,r9
+ jb $L$1st4x
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rcx]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],rdi
+ mov r13,rdx
+
+ xor rdi,rdi
+ add r13,r10
+ adc rdi,0
+ mov QWORD PTR[((-8))+r15*8+rsp],r13
+ mov QWORD PTR[r15*8+rsp],rdi
+
+ lea r14,QWORD PTR[1+r14]
+ALIGN 4
+$L$outer4x::
+ mov rbx,QWORD PTR[r14*8+r12]
+ xor r15,r15
+ mov r10,QWORD PTR[rsp]
+ mov rbp,r8
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+
+ imul rbp,r10
+ mov r11,rdx
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[8+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[8+rsp]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea r15,QWORD PTR[4+r15]
+ adc rdx,0
+ mov QWORD PTR[rsp],rdi
+ mov r13,rdx
+ jmp $L$inner4x
+ALIGN 16
+$L$inner4x::
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rcx]
+ adc rdx,0
+ add r10,QWORD PTR[((-16))+r15*8+rsp]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[((-8))+r15*8+rsp]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],rdi
+ mov r13,rdx
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ add r10,QWORD PTR[r15*8+rsp]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[8+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-8))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[8+r15*8+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[8+r15*8+rsp]
+ adc rdx,0
+ lea r15,QWORD PTR[4+r15]
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-32))+r15*8+rsp],rdi
+ mov r13,rdx
+ cmp r15,r9
+ jb $L$inner4x
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-16))+r15*8+rcx]
+ adc rdx,0
+ add r10,QWORD PTR[((-16))+r15*8+rsp]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r15*8+rsp],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-8))+r15*8+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[((-8))+r15*8+rsp]
+ adc rdx,0
+ lea r14,QWORD PTR[1+r14]
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],rdi
+ mov r13,rdx
+
+ xor rdi,rdi
+ add r13,r10
+ adc rdi,0
+ add r13,QWORD PTR[r9*8+rsp]
+ adc rdi,0
+ mov QWORD PTR[((-8))+r15*8+rsp],r13
+ mov QWORD PTR[r15*8+rsp],rdi
+
+ cmp r14,r9
+ jb $L$outer4x
+ mov rdi,QWORD PTR[16+r9*8+rsp]
+ mov rax,QWORD PTR[rsp]
+ mov rdx,QWORD PTR[8+rsp]
+ shr r9,2
+ lea rsi,QWORD PTR[rsp]
+ xor r14,r14
+
+ sub rax,QWORD PTR[rcx]
+ mov rbx,QWORD PTR[16+rsi]
+ mov rbp,QWORD PTR[24+rsi]
+ sbb rdx,QWORD PTR[8+rcx]
+ lea r15,QWORD PTR[((-1))+r9]
+ jmp $L$sub4x
+ALIGN 16
+$L$sub4x::
+ mov QWORD PTR[r14*8+rdi],rax
+ mov QWORD PTR[8+r14*8+rdi],rdx
+ sbb rbx,QWORD PTR[16+r14*8+rcx]
+ mov rax,QWORD PTR[32+r14*8+rsi]
+ mov rdx,QWORD PTR[40+r14*8+rsi]
+ sbb rbp,QWORD PTR[24+r14*8+rcx]
+ mov QWORD PTR[16+r14*8+rdi],rbx
+ mov QWORD PTR[24+r14*8+rdi],rbp
+ sbb rax,QWORD PTR[32+r14*8+rcx]
+ mov rbx,QWORD PTR[48+r14*8+rsi]
+ mov rbp,QWORD PTR[56+r14*8+rsi]
+ sbb rdx,QWORD PTR[40+r14*8+rcx]
+ lea r14,QWORD PTR[4+r14]
+ dec r15
+ jnz $L$sub4x
+
+ mov QWORD PTR[r14*8+rdi],rax
+ mov rax,QWORD PTR[32+r14*8+rsi]
+ sbb rbx,QWORD PTR[16+r14*8+rcx]
+ mov QWORD PTR[8+r14*8+rdi],rdx
+ sbb rbp,QWORD PTR[24+r14*8+rcx]
+ mov QWORD PTR[16+r14*8+rdi],rbx
+
+ sbb rax,0
+DB 66h, 48h, 0fh, 6eh, 0c0h
+ punpcklqdq xmm0,xmm0
+ mov QWORD PTR[24+r14*8+rdi],rbp
+ xor r14,r14
+
+ mov r15,r9
+ pxor xmm5,xmm5
+ jmp $L$copy4x
+ALIGN 16
+$L$copy4x::
+ movdqu xmm2,XMMWORD PTR[r14*1+rsp]
+ movdqu xmm4,XMMWORD PTR[16+r14*1+rsp]
+ movdqu xmm1,XMMWORD PTR[r14*1+rdi]
+ movdqu xmm3,XMMWORD PTR[16+r14*1+rdi]
+ pxor xmm2,xmm1
+ pxor xmm4,xmm3
+ pand xmm2,xmm0
+ pand xmm4,xmm0
+ pxor xmm2,xmm1
+ pxor xmm4,xmm3
+ movdqu XMMWORD PTR[r14*1+rdi],xmm2
+ movdqu XMMWORD PTR[16+r14*1+rdi],xmm4
+ movdqa XMMWORD PTR[r14*1+rsp],xmm5
+ movdqa XMMWORD PTR[16+r14*1+rsp],xmm5
+
+ lea r14,QWORD PTR[32+r14]
+ dec r15
+ jnz $L$copy4x
+
+ shl r9,2
+ mov rsi,QWORD PTR[8+r9*8+rsp]
+ mov rax,1
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$mul4x_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_mul4x_mont::
+bn_mul4x_mont ENDP
+EXTERN bn_sqr8x_internal:NEAR
+
+
+ALIGN 32
+bn_sqr8x_mont PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_sqr8x_mont::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+$L$sqr8x_enter::
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov r10d,r9d
+ shl r9d,3
+ shl r10,3+2
+ neg r9
+
+
+
+
+
+
+ lea r11,QWORD PTR[((-64))+r9*4+rsp]
+ mov r8,QWORD PTR[r8]
+ sub r11,rsi
+ and r11,4095
+ cmp r10,r11
+ jb $L$sqr8x_sp_alt
+ sub rsp,r11
+ lea rsp,QWORD PTR[((-64))+r9*4+rsp]
+ jmp $L$sqr8x_sp_done
+
+ALIGN 32
+$L$sqr8x_sp_alt::
+ lea r10,QWORD PTR[((4096-64))+r9*4]
+ lea rsp,QWORD PTR[((-64))+r9*4+rsp]
+ sub r11,r10
+ mov r10,0
+ cmovc r11,r10
+ sub rsp,r11
+$L$sqr8x_sp_done::
+ and rsp,-64
+ mov r10,r9
+ neg r9
+
+ lea r11,QWORD PTR[64+r9*2+rsp]
+ mov QWORD PTR[32+rsp],r8
+ mov QWORD PTR[40+rsp],rax
+$L$sqr8x_body::
+
+ mov rbp,r9
+DB 102,73,15,110,211
+ shr rbp,3+2
+ mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))]
+ jmp $L$sqr8x_copy_n
+
+ALIGN 32
+$L$sqr8x_copy_n::
+ movq xmm0,QWORD PTR[rcx]
+ movq xmm1,QWORD PTR[8+rcx]
+ movq xmm3,QWORD PTR[16+rcx]
+ movq xmm4,QWORD PTR[24+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ movdqa XMMWORD PTR[r11],xmm0
+ movdqa XMMWORD PTR[16+r11],xmm1
+ movdqa XMMWORD PTR[32+r11],xmm3
+ movdqa XMMWORD PTR[48+r11],xmm4
+ lea r11,QWORD PTR[64+r11]
+ dec rbp
+ jnz $L$sqr8x_copy_n
+
+ pxor xmm0,xmm0
+DB 102,72,15,110,207
+DB 102,73,15,110,218
+ call bn_sqr8x_internal
+
+ pxor xmm0,xmm0
+ lea rax,QWORD PTR[48+rsp]
+ lea rdx,QWORD PTR[64+r9*2+rsp]
+ shr r9,3+2
+ mov rsi,QWORD PTR[40+rsp]
+ jmp $L$sqr8x_zero
+
+ALIGN 32
+$L$sqr8x_zero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ movdqa XMMWORD PTR[32+rax],xmm0
+ movdqa XMMWORD PTR[48+rax],xmm0
+ lea rax,QWORD PTR[64+rax]
+ movdqa XMMWORD PTR[rdx],xmm0
+ movdqa XMMWORD PTR[16+rdx],xmm0
+ movdqa XMMWORD PTR[32+rdx],xmm0
+ movdqa XMMWORD PTR[48+rdx],xmm0
+ lea rdx,QWORD PTR[64+rdx]
+ dec r9
+ jnz $L$sqr8x_zero
+
+ mov rax,1
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$sqr8x_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_sqr8x_mont::
+bn_sqr8x_mont ENDP
+DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+ALIGN 16
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+mul_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov r10,QWORD PTR[192+r8]
+ mov rax,QWORD PTR[8+r10*8+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+ jmp $L$common_seh_tail
+mul_handler ENDP
+
+
+ALIGN 16
+sqr_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov rax,QWORD PTR[40+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+sqr_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_bn_mul_mont
+ DD imagerel $L$SEH_end_bn_mul_mont
+ DD imagerel $L$SEH_info_bn_mul_mont
+
+ DD imagerel $L$SEH_begin_bn_mul4x_mont
+ DD imagerel $L$SEH_end_bn_mul4x_mont
+ DD imagerel $L$SEH_info_bn_mul4x_mont
+
+ DD imagerel $L$SEH_begin_bn_sqr8x_mont
+ DD imagerel $L$SEH_end_bn_sqr8x_mont
+ DD imagerel $L$SEH_info_bn_sqr8x_mont
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_bn_mul_mont::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$mul_body,imagerel $L$mul_epilogue
+$L$SEH_info_bn_mul4x_mont::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
+$L$SEH_info_bn_sqr8x_mont::
+DB 9,0,0,0
+ DD imagerel sqr_handler
+ DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/bn/x86_64-mont5.asm b/win-x86_64/crypto/bn/x86_64-mont5.asm
new file mode 100644
index 0000000..90c6100
--- /dev/null
+++ b/win-x86_64/crypto/bn/x86_64-mont5.asm
@@ -0,0 +1,2061 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC bn_mul_mont_gather5
+
+ALIGN 64
+bn_mul_mont_gather5 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_mul_mont_gather5::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ test r9d,7
+ jnz $L$mul_enter
+ jmp $L$mul4x_enter
+
+ALIGN 16
+$L$mul_enter::
+ mov r9d,r9d
+ mov rax,rsp
+ mov r10d,DWORD PTR[56+rsp]
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-40))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ lea r11,QWORD PTR[2+r9]
+ neg r11
+ lea rsp,QWORD PTR[r11*8+rsp]
+ and rsp,-1024
+
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul_body::
+ mov r12,rdx
+ mov r11,r10
+ shr r10,3
+ and r11,7
+ not r10
+ lea rax,QWORD PTR[$L$magic_masks]
+ and r10,3
+ lea r12,QWORD PTR[96+r11*8+r12]
+ movq xmm4,QWORD PTR[r10*8+rax]
+ movq xmm5,QWORD PTR[8+r10*8+rax]
+ movq xmm6,QWORD PTR[16+r10*8+rax]
+ movq xmm7,QWORD PTR[24+r10*8+rax]
+
+ movq xmm0,QWORD PTR[(((-96)))+r12]
+ movq xmm1,QWORD PTR[((-32))+r12]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[32+r12]
+ pand xmm1,xmm5
+ movq xmm3,QWORD PTR[96+r12]
+ pand xmm2,xmm6
+ por xmm0,xmm1
+ pand xmm3,xmm7
+ por xmm0,xmm2
+ lea r12,QWORD PTR[256+r12]
+ por xmm0,xmm3
+
+DB 102,72,15,126,195
+
+ mov r8,QWORD PTR[r8]
+ mov rax,QWORD PTR[rsi]
+
+ xor r14,r14
+ xor r15,r15
+
+ movq xmm0,QWORD PTR[(((-96)))+r12]
+ movq xmm1,QWORD PTR[((-32))+r12]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[32+r12]
+ pand xmm1,xmm5
+
+ mov rbp,r8
+ mul rbx
+ mov r10,rax
+ mov rax,QWORD PTR[rcx]
+
+ movq xmm3,QWORD PTR[96+r12]
+ pand xmm2,xmm6
+ por xmm0,xmm1
+ pand xmm3,xmm7
+
+ imul rbp,r10
+ mov r11,rdx
+
+ por xmm0,xmm2
+ lea r12,QWORD PTR[256+r12]
+ por xmm0,xmm3
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov r13,rdx
+
+ lea r15,QWORD PTR[1+r15]
+ jmp $L$1st_enter
+
+ALIGN 16
+$L$1st::
+ add r13,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add r13,r11
+ mov r11,r10
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+$L$1st_enter::
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ lea r15,QWORD PTR[1+r15]
+ mov r10,rdx
+
+ mul rbp
+ cmp r15,r9
+ jne $L$1st
+
+DB 102,72,15,126,195
+
+ add r13,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r13,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+ mov r11,r10
+
+ xor rdx,rdx
+ add r13,r11
+ adc rdx,0
+ mov QWORD PTR[((-8))+r9*8+rsp],r13
+ mov QWORD PTR[r9*8+rsp],rdx
+
+ lea r14,QWORD PTR[1+r14]
+ jmp $L$outer
+ALIGN 16
+$L$outer::
+ xor r15,r15
+ mov rbp,r8
+ mov r10,QWORD PTR[rsp]
+
+ movq xmm0,QWORD PTR[(((-96)))+r12]
+ movq xmm1,QWORD PTR[((-32))+r12]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[32+r12]
+ pand xmm1,xmm5
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+
+ movq xmm3,QWORD PTR[96+r12]
+ pand xmm2,xmm6
+ por xmm0,xmm1
+ pand xmm3,xmm7
+
+ imul rbp,r10
+ mov r11,rdx
+
+ por xmm0,xmm2
+ lea r12,QWORD PTR[256+r12]
+ por xmm0,xmm3
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+rsi]
+ adc rdx,0
+ mov r10,QWORD PTR[8+rsp]
+ mov r13,rdx
+
+ lea r15,QWORD PTR[1+r15]
+ jmp $L$inner_enter
+
+ALIGN 16
+$L$inner::
+ add r13,rax
+ mov rax,QWORD PTR[r15*8+rsi]
+ adc rdx,0
+ add r13,r10
+ mov r10,QWORD PTR[r15*8+rsp]
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+$L$inner_enter::
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[r15*8+rcx]
+ adc rdx,0
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+ lea r15,QWORD PTR[1+r15]
+
+ mul rbp
+ cmp r15,r9
+ jne $L$inner
+
+DB 102,72,15,126,195
+
+ add r13,rax
+ mov rax,QWORD PTR[rsi]
+ adc rdx,0
+ add r13,r10
+ mov r10,QWORD PTR[r15*8+rsp]
+ adc rdx,0
+ mov QWORD PTR[((-16))+r15*8+rsp],r13
+ mov r13,rdx
+
+ xor rdx,rdx
+ add r13,r11
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-8))+r9*8+rsp],r13
+ mov QWORD PTR[r9*8+rsp],rdx
+
+ lea r14,QWORD PTR[1+r14]
+ cmp r14,r9
+ jb $L$outer
+
+ xor r14,r14
+ mov rax,QWORD PTR[rsp]
+ lea rsi,QWORD PTR[rsp]
+ mov r15,r9
+ jmp $L$sub
+ALIGN 16
+$L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
+ mov QWORD PTR[r14*8+rdi],rax
+ mov rax,QWORD PTR[8+r14*8+rsi]
+ lea r14,QWORD PTR[1+r14]
+ dec r15
+ jnz $L$sub
+
+ sbb rax,0
+ xor r14,r14
+ mov r15,r9
+ALIGN 16
+$L$copy::
+ mov rsi,QWORD PTR[r14*8+rsp]
+ mov rcx,QWORD PTR[r14*8+rdi]
+ xor rsi,rcx
+ and rsi,rax
+ xor rsi,rcx
+ mov QWORD PTR[r14*8+rsp],r14
+ mov QWORD PTR[r14*8+rdi],rsi
+ lea r14,QWORD PTR[1+r14]
+ sub r15,1
+ jnz $L$copy
+
+ mov rsi,QWORD PTR[8+r9*8+rsp]
+ mov rax,1
+ movaps xmm6,XMMWORD PTR[((-88))+rsi]
+ movaps xmm7,XMMWORD PTR[((-72))+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$mul_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_mul_mont_gather5::
+bn_mul_mont_gather5 ENDP
+
+ALIGN 32
+bn_mul4x_mont_gather5 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_mul4x_mont_gather5::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+$L$mul4x_enter::
+DB 067h
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-40))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+DB 067h
+ mov r10d,r9d
+ shl r9d,3
+ shl r10d,3+2
+ neg r9
+
+
+
+
+
+
+
+
+ lea r11,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,rsi
+ and r11,4095
+ cmp r10,r11
+ jb $L$mul4xsp_alt
+ sub rsp,r11
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ jmp $L$mul4xsp_done
+
+ALIGN 32
+$L$mul4xsp_alt::
+ lea r10,QWORD PTR[((4096-64))+r9*2]
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,r10
+ mov r10,0
+ cmovc r11,r10
+ sub rsp,r11
+$L$mul4xsp_done::
+ and rsp,-64
+ neg r9
+
+ mov QWORD PTR[40+rsp],rax
+$L$mul4x_body::
+
+ call mul4x_internal
+
+ mov rsi,QWORD PTR[40+rsp]
+ mov rax,1
+ movaps xmm6,XMMWORD PTR[((-88))+rsi]
+ movaps xmm7,XMMWORD PTR[((-72))+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$mul4x_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_mul4x_mont_gather5::
+bn_mul4x_mont_gather5 ENDP
+
+
+ALIGN 32
+mul4x_internal PROC PRIVATE
+ shl r9,5
+ mov r10d,DWORD PTR[56+rax]
+ lea r13,QWORD PTR[256+r9*1+rdx]
+ shr r9,5
+ mov r11,r10
+ shr r10,3
+ and r11,7
+ not r10
+ lea rax,QWORD PTR[$L$magic_masks]
+ and r10,3
+ lea r12,QWORD PTR[96+r11*8+rdx]
+ movq xmm4,QWORD PTR[r10*8+rax]
+ movq xmm5,QWORD PTR[8+r10*8+rax]
+ add r11,7
+ movq xmm6,QWORD PTR[16+r10*8+rax]
+ movq xmm7,QWORD PTR[24+r10*8+rax]
+ and r11,7
+
+ movq xmm0,QWORD PTR[(((-96)))+r12]
+ lea r14,QWORD PTR[256+r12]
+ movq xmm1,QWORD PTR[((-32))+r12]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[32+r12]
+ pand xmm1,xmm5
+ movq xmm3,QWORD PTR[96+r12]
+ pand xmm2,xmm6
+DB 067h
+ por xmm0,xmm1
+ movq xmm1,QWORD PTR[((-96))+r14]
+DB 067h
+ pand xmm3,xmm7
+DB 067h
+ por xmm0,xmm2
+ movq xmm2,QWORD PTR[((-32))+r14]
+DB 067h
+ pand xmm1,xmm4
+DB 067h
+ por xmm0,xmm3
+ movq xmm3,QWORD PTR[32+r14]
+
+DB 102,72,15,126,195
+ movq xmm0,QWORD PTR[96+r14]
+ mov QWORD PTR[((16+8))+rsp],r13
+ mov QWORD PTR[((56+8))+rsp],rdi
+
+ mov r8,QWORD PTR[r8]
+ mov rax,QWORD PTR[rsi]
+ lea rsi,QWORD PTR[r9*1+rsi]
+ neg r9
+
+ mov rbp,r8
+ mul rbx
+ mov r10,rax
+ mov rax,QWORD PTR[rcx]
+
+ pand xmm2,xmm5
+ pand xmm3,xmm6
+ por xmm1,xmm2
+
+ imul rbp,r10
+
+
+
+
+
+
+
+ lea r14,QWORD PTR[((64+8))+r11*8+rsp]
+ mov r11,rdx
+
+ pand xmm0,xmm7
+ por xmm1,xmm3
+ lea r12,QWORD PTR[512+r12]
+ por xmm0,xmm1
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+r9*1+rsi]
+ adc rdx,0
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[16+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+r9*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea r15,QWORD PTR[32+r9]
+ lea rcx,QWORD PTR[64+rcx]
+ adc rdx,0
+ mov QWORD PTR[r14],rdi
+ mov r13,rdx
+ jmp $L$1st4x
+
+ALIGN 32
+$L$1st4x::
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-32))+rcx]
+ lea r14,QWORD PTR[32+r14]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*1+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r14],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-16))+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r15*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r14],rdi
+ mov r13,rdx
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[8+r15*1+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-8))+r14],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[16+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+r15*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea rcx,QWORD PTR[64+rcx]
+ adc rdx,0
+ mov QWORD PTR[r14],rdi
+ mov r13,rdx
+
+ add r15,32
+ jnz $L$1st4x
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-32))+rcx]
+ lea r14,QWORD PTR[32+r14]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-24))+r14],r13
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-16))+rcx]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r9*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-16))+r14],rdi
+ mov r13,rdx
+
+DB 102,72,15,126,195
+ lea rcx,QWORD PTR[r9*2+rcx]
+
+ xor rdi,rdi
+ add r13,r10
+ adc rdi,0
+ mov QWORD PTR[((-8))+r14],r13
+
+ jmp $L$outer4x
+
+ALIGN 32
+$L$outer4x::
+ mov r10,QWORD PTR[r9*1+r14]
+ mov rbp,r8
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+
+ movq xmm0,QWORD PTR[(((-96)))+r12]
+ movq xmm1,QWORD PTR[((-32))+r12]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[32+r12]
+ pand xmm1,xmm5
+ movq xmm3,QWORD PTR[96+r12]
+
+ imul rbp,r10
+DB 067h
+ mov r11,rdx
+ mov QWORD PTR[r14],rdi
+
+ pand xmm2,xmm6
+ por xmm0,xmm1
+ pand xmm3,xmm7
+ por xmm0,xmm2
+ lea r14,QWORD PTR[r9*1+r14]
+ lea r12,QWORD PTR[256+r12]
+ por xmm0,xmm3
+
+ mul rbp
+ add r10,rax
+ mov rax,QWORD PTR[8+r9*1+rsi]
+ adc rdx,0
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[16+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[8+r14]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+r9*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea r15,QWORD PTR[32+r9]
+ lea rcx,QWORD PTR[64+rcx]
+ adc rdx,0
+ mov r13,rdx
+ jmp $L$inner4x
+
+ALIGN 32
+$L$inner4x::
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-32))+rcx]
+ adc rdx,0
+ add r10,QWORD PTR[16+r14]
+ lea r14,QWORD PTR[32+r14]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+r15*1+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-32))+r14],rdi
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[((-16))+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[((-8))+r14]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r15*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-24))+r14],r13
+ mov r13,rdx
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[rcx]
+ adc rdx,0
+ add r10,QWORD PTR[r14]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[8+r15*1+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-16))+r14],rdi
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[16+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[8+r14]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[16+r15*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ lea rcx,QWORD PTR[64+rcx]
+ adc rdx,0
+ mov QWORD PTR[((-8))+r14],r13
+ mov r13,rdx
+
+ add r15,32
+ jnz $L$inner4x
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[((-32))+rcx]
+ adc rdx,0
+ add r10,QWORD PTR[16+r14]
+ lea r14,QWORD PTR[32+r14]
+ adc rdx,0
+ mov r11,rdx
+
+ mul rbp
+ add r13,rax
+ mov rax,QWORD PTR[((-8))+rsi]
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+ mov QWORD PTR[((-32))+r14],rdi
+ mov rdi,rdx
+
+ mul rbx
+ add r11,rax
+ mov rax,rbp
+ mov rbp,QWORD PTR[((-16))+rcx]
+ adc rdx,0
+ add r11,QWORD PTR[((-8))+r14]
+ adc rdx,0
+ mov r10,rdx
+
+ mul rbp
+ add rdi,rax
+ mov rax,QWORD PTR[r9*1+rsi]
+ adc rdx,0
+ add rdi,r11
+ adc rdx,0
+ mov QWORD PTR[((-24))+r14],r13
+ mov r13,rdx
+
+DB 102,72,15,126,195
+ mov QWORD PTR[((-16))+r14],rdi
+ lea rcx,QWORD PTR[r9*2+rcx]
+
+ xor rdi,rdi
+ add r13,r10
+ adc rdi,0
+ add r13,QWORD PTR[r14]
+ adc rdi,0
+ mov QWORD PTR[((-8))+r14],r13
+
+ cmp r12,QWORD PTR[((16+8))+rsp]
+ jb $L$outer4x
+ sub rbp,r13
+ adc r15,r15
+ or rdi,r15
+ xor rdi,1
+ lea rbx,QWORD PTR[r9*1+r14]
+ lea rbp,QWORD PTR[rdi*8+rcx]
+ mov rcx,r9
+ sar rcx,3+2
+ mov rdi,QWORD PTR[((56+8))+rsp]
+ jmp $L$sqr4x_sub
+mul4x_internal ENDP
+PUBLIC bn_power5
+
+ALIGN 32
+bn_power5 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_power5::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-40))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ mov r10d,r9d
+ shl r9d,3
+ shl r10d,3+2
+ neg r9
+ mov r8,QWORD PTR[r8]
+
+
+
+
+
+
+
+ lea r11,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,rsi
+ and r11,4095
+ cmp r10,r11
+ jb $L$pwr_sp_alt
+ sub rsp,r11
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ jmp $L$pwr_sp_done
+
+ALIGN 32
+$L$pwr_sp_alt::
+ lea r10,QWORD PTR[((4096-64))+r9*2]
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,r10
+ mov r10,0
+ cmovc r11,r10
+ sub rsp,r11
+$L$pwr_sp_done::
+ and rsp,-64
+ mov r10,r9
+ neg r9
+
+
+
+
+
+
+
+
+
+
+ mov QWORD PTR[32+rsp],r8
+ mov QWORD PTR[40+rsp],rax
+$L$power5_body::
+DB 102,72,15,110,207
+DB 102,72,15,110,209
+DB 102,73,15,110,218
+DB 102,72,15,110,226
+
+ call __bn_sqr8x_internal
+ call __bn_sqr8x_internal
+ call __bn_sqr8x_internal
+ call __bn_sqr8x_internal
+ call __bn_sqr8x_internal
+
+DB 102,72,15,126,209
+DB 102,72,15,126,226
+ mov rdi,rsi
+ mov rax,QWORD PTR[40+rsp]
+ lea r8,QWORD PTR[32+rsp]
+
+ call mul4x_internal
+
+ mov rsi,QWORD PTR[40+rsp]
+ mov rax,1
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$power5_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_power5::
+bn_power5 ENDP
+
+PUBLIC bn_sqr8x_internal
+
+
+ALIGN 32
+bn_sqr8x_internal PROC PUBLIC
+__bn_sqr8x_internal::
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ lea rbp,QWORD PTR[32+r10]
+ lea rsi,QWORD PTR[r9*1+rsi]
+
+ mov rcx,r9
+
+
+ mov r14,QWORD PTR[((-32))+rbp*1+rsi]
+ lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
+ mov rax,QWORD PTR[((-24))+rbp*1+rsi]
+ lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
+ mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
+ mov r15,rax
+
+ mul r14
+ mov r10,rax
+ mov rax,rbx
+ mov r11,rdx
+ mov QWORD PTR[((-24))+rbp*1+rdi],r10
+
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ adc rdx,0
+ mov QWORD PTR[((-16))+rbp*1+rdi],r11
+ mov r10,rdx
+
+
+ mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
+ mul r15
+ mov r12,rax
+ mov rax,rbx
+ mov r13,rdx
+
+ lea rcx,QWORD PTR[rbp]
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ mov r11,rdx
+ adc r11,0
+ add r10,r12
+ adc r11,0
+ mov QWORD PTR[((-8))+rcx*1+rdi],r10
+ jmp $L$sqr4x_1st
+
+ALIGN 32
+$L$sqr4x_1st::
+ mov rbx,QWORD PTR[rcx*1+rsi]
+ mul r15
+ add r13,rax
+ mov rax,rbx
+ mov r12,rdx
+ adc r12,0
+
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ mov rbx,QWORD PTR[8+rcx*1+rsi]
+ mov r10,rdx
+ adc r10,0
+ add r11,r13
+ adc r10,0
+
+
+ mul r15
+ add r12,rax
+ mov rax,rbx
+ mov QWORD PTR[rcx*1+rdi],r11
+ mov r13,rdx
+ adc r13,0
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ mov rbx,QWORD PTR[16+rcx*1+rsi]
+ mov r11,rdx
+ adc r11,0
+ add r10,r12
+ adc r11,0
+
+ mul r15
+ add r13,rax
+ mov rax,rbx
+ mov QWORD PTR[8+rcx*1+rdi],r10
+ mov r12,rdx
+ adc r12,0
+
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ mov rbx,QWORD PTR[24+rcx*1+rsi]
+ mov r10,rdx
+ adc r10,0
+ add r11,r13
+ adc r10,0
+
+
+ mul r15
+ add r12,rax
+ mov rax,rbx
+ mov QWORD PTR[16+rcx*1+rdi],r11
+ mov r13,rdx
+ adc r13,0
+ lea rcx,QWORD PTR[32+rcx]
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ mov r11,rdx
+ adc r11,0
+ add r10,r12
+ adc r11,0
+ mov QWORD PTR[((-8))+rcx*1+rdi],r10
+
+ cmp rcx,0
+ jne $L$sqr4x_1st
+
+ mul r15
+ add r13,rax
+ lea rbp,QWORD PTR[16+rbp]
+ adc rdx,0
+ add r13,r11
+ adc rdx,0
+
+ mov QWORD PTR[rdi],r13
+ mov r12,rdx
+ mov QWORD PTR[8+rdi],rdx
+ jmp $L$sqr4x_outer
+
+ALIGN 32
+$L$sqr4x_outer::
+ mov r14,QWORD PTR[((-32))+rbp*1+rsi]
+ lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
+ mov rax,QWORD PTR[((-24))+rbp*1+rsi]
+ lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
+ mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
+ mov r15,rax
+
+ mul r14
+ mov r10,QWORD PTR[((-24))+rbp*1+rdi]
+ add r10,rax
+ mov rax,rbx
+ adc rdx,0
+ mov QWORD PTR[((-24))+rbp*1+rdi],r10
+ mov r11,rdx
+
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ adc rdx,0
+ add r11,QWORD PTR[((-16))+rbp*1+rdi]
+ mov r10,rdx
+ adc r10,0
+ mov QWORD PTR[((-16))+rbp*1+rdi],r11
+
+ xor r12,r12
+
+ mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
+ mul r15
+ add r12,rax
+ mov rax,rbx
+ adc rdx,0
+ add r12,QWORD PTR[((-8))+rbp*1+rdi]
+ mov r13,rdx
+ adc r13,0
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ adc rdx,0
+ add r10,r12
+ mov r11,rdx
+ adc r11,0
+ mov QWORD PTR[((-8))+rbp*1+rdi],r10
+
+ lea rcx,QWORD PTR[rbp]
+ jmp $L$sqr4x_inner
+
+ALIGN 32
+$L$sqr4x_inner::
+ mov rbx,QWORD PTR[rcx*1+rsi]
+ mul r15
+ add r13,rax
+ mov rax,rbx
+ mov r12,rdx
+ adc r12,0
+ add r13,QWORD PTR[rcx*1+rdi]
+ adc r12,0
+
+DB 067h
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ mov rbx,QWORD PTR[8+rcx*1+rsi]
+ mov r10,rdx
+ adc r10,0
+ add r11,r13
+ adc r10,0
+
+ mul r15
+ add r12,rax
+ mov QWORD PTR[rcx*1+rdi],r11
+ mov rax,rbx
+ mov r13,rdx
+ adc r13,0
+ add r12,QWORD PTR[8+rcx*1+rdi]
+ lea rcx,QWORD PTR[16+rcx]
+ adc r13,0
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ adc rdx,0
+ add r10,r12
+ mov r11,rdx
+ adc r11,0
+ mov QWORD PTR[((-8))+rcx*1+rdi],r10
+
+ cmp rcx,0
+ jne $L$sqr4x_inner
+
+DB 067h
+ mul r15
+ add r13,rax
+ adc rdx,0
+ add r13,r11
+ adc rdx,0
+
+ mov QWORD PTR[rdi],r13
+ mov r12,rdx
+ mov QWORD PTR[8+rdi],rdx
+
+ add rbp,16
+ jnz $L$sqr4x_outer
+
+
+ mov r14,QWORD PTR[((-32))+rsi]
+ lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
+ mov rax,QWORD PTR[((-24))+rsi]
+ lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
+ mov rbx,QWORD PTR[((-16))+rsi]
+ mov r15,rax
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ mov r11,rdx
+ adc r11,0
+
+ mul r14
+ add r11,rax
+ mov rax,rbx
+ mov QWORD PTR[((-24))+rdi],r10
+ mov r10,rdx
+ adc r10,0
+ add r11,r13
+ mov rbx,QWORD PTR[((-8))+rsi]
+ adc r10,0
+
+ mul r15
+ add r12,rax
+ mov rax,rbx
+ mov QWORD PTR[((-16))+rdi],r11
+ mov r13,rdx
+ adc r13,0
+
+ mul r14
+ add r10,rax
+ mov rax,rbx
+ mov r11,rdx
+ adc r11,0
+ add r10,r12
+ adc r11,0
+ mov QWORD PTR[((-8))+rdi],r10
+
+ mul r15
+ add r13,rax
+ mov rax,QWORD PTR[((-16))+rsi]
+ adc rdx,0
+ add r13,r11
+ adc rdx,0
+
+ mov QWORD PTR[rdi],r13
+ mov r12,rdx
+ mov QWORD PTR[8+rdi],rdx
+
+ mul rbx
+ add rbp,16
+ xor r14,r14
+ sub rbp,r9
+ xor r15,r15
+
+ add rax,r12
+ adc rdx,0
+ mov QWORD PTR[8+rdi],rax
+ mov QWORD PTR[16+rdi],rdx
+ mov QWORD PTR[24+rdi],r15
+
+ mov rax,QWORD PTR[((-16))+rbp*1+rsi]
+ lea rdi,QWORD PTR[((48+8))+rsp]
+ xor r10,r10
+ mov r11,QWORD PTR[8+rdi]
+
+ lea r12,QWORD PTR[r10*2+r14]
+ shr r10,63
+ lea r13,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r13,r10
+ mov r10,QWORD PTR[16+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[24+rdi]
+ adc r12,rax
+ mov rax,QWORD PTR[((-8))+rbp*1+rsi]
+ mov QWORD PTR[rdi],r12
+ adc r13,rdx
+
+ lea rbx,QWORD PTR[r10*2+r14]
+ mov QWORD PTR[8+rdi],r13
+ sbb r15,r15
+ shr r10,63
+ lea r8,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r8,r10
+ mov r10,QWORD PTR[32+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[40+rdi]
+ adc rbx,rax
+ mov rax,QWORD PTR[rbp*1+rsi]
+ mov QWORD PTR[16+rdi],rbx
+ adc r8,rdx
+ lea rbp,QWORD PTR[16+rbp]
+ mov QWORD PTR[24+rdi],r8
+ sbb r15,r15
+ lea rdi,QWORD PTR[64+rdi]
+ jmp $L$sqr4x_shift_n_add
+
+ALIGN 32
+$L$sqr4x_shift_n_add::
+ lea r12,QWORD PTR[r10*2+r14]
+ shr r10,63
+ lea r13,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r13,r10
+ mov r10,QWORD PTR[((-16))+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[((-8))+rdi]
+ adc r12,rax
+ mov rax,QWORD PTR[((-8))+rbp*1+rsi]
+ mov QWORD PTR[((-32))+rdi],r12
+ adc r13,rdx
+
+ lea rbx,QWORD PTR[r10*2+r14]
+ mov QWORD PTR[((-24))+rdi],r13
+ sbb r15,r15
+ shr r10,63
+ lea r8,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r8,r10
+ mov r10,QWORD PTR[rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[8+rdi]
+ adc rbx,rax
+ mov rax,QWORD PTR[rbp*1+rsi]
+ mov QWORD PTR[((-16))+rdi],rbx
+ adc r8,rdx
+
+ lea r12,QWORD PTR[r10*2+r14]
+ mov QWORD PTR[((-8))+rdi],r8
+ sbb r15,r15
+ shr r10,63
+ lea r13,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r13,r10
+ mov r10,QWORD PTR[16+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[24+rdi]
+ adc r12,rax
+ mov rax,QWORD PTR[8+rbp*1+rsi]
+ mov QWORD PTR[rdi],r12
+ adc r13,rdx
+
+ lea rbx,QWORD PTR[r10*2+r14]
+ mov QWORD PTR[8+rdi],r13
+ sbb r15,r15
+ shr r10,63
+ lea r8,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r8,r10
+ mov r10,QWORD PTR[32+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[40+rdi]
+ adc rbx,rax
+ mov rax,QWORD PTR[16+rbp*1+rsi]
+ mov QWORD PTR[16+rdi],rbx
+ adc r8,rdx
+ mov QWORD PTR[24+rdi],r8
+ sbb r15,r15
+ lea rdi,QWORD PTR[64+rdi]
+ add rbp,32
+ jnz $L$sqr4x_shift_n_add
+
+ lea r12,QWORD PTR[r10*2+r14]
+DB 067h
+ shr r10,63
+ lea r13,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r13,r10
+ mov r10,QWORD PTR[((-16))+rdi]
+ mov r14,r11
+ mul rax
+ neg r15
+ mov r11,QWORD PTR[((-8))+rdi]
+ adc r12,rax
+ mov rax,QWORD PTR[((-8))+rsi]
+ mov QWORD PTR[((-32))+rdi],r12
+ adc r13,rdx
+
+ lea rbx,QWORD PTR[r10*2+r14]
+ mov QWORD PTR[((-24))+rdi],r13
+ sbb r15,r15
+ shr r10,63
+ lea r8,QWORD PTR[r11*2+rcx]
+ shr r11,63
+ or r8,r10
+ mul rax
+ neg r15
+ adc rbx,rax
+ adc r8,rdx
+ mov QWORD PTR[((-16))+rdi],rbx
+ mov QWORD PTR[((-8))+rdi],r8
+DB 102,72,15,126,213
+sqr8x_reduction::
+ xor rax,rax
+ lea rcx,QWORD PTR[r9*2+rbp]
+ lea rdx,QWORD PTR[((48+8))+r9*2+rsp]
+ mov QWORD PTR[((0+8))+rsp],rcx
+ lea rdi,QWORD PTR[((48+8))+r9*1+rsp]
+ mov QWORD PTR[((8+8))+rsp],rdx
+ neg r9
+ jmp $L$8x_reduction_loop
+
+ALIGN 32
+$L$8x_reduction_loop::
+ lea rdi,QWORD PTR[r9*1+rdi]
+DB 066h
+ mov rbx,QWORD PTR[rdi]
+ mov r9,QWORD PTR[8+rdi]
+ mov r10,QWORD PTR[16+rdi]
+ mov r11,QWORD PTR[24+rdi]
+ mov r12,QWORD PTR[32+rdi]
+ mov r13,QWORD PTR[40+rdi]
+ mov r14,QWORD PTR[48+rdi]
+ mov r15,QWORD PTR[56+rdi]
+ mov QWORD PTR[rdx],rax
+ lea rdi,QWORD PTR[64+rdi]
+
+DB 067h
+ mov r8,rbx
+ imul rbx,QWORD PTR[((32+8))+rsp]
+ mov rax,QWORD PTR[rbp]
+ mov ecx,8
+ jmp $L$8x_reduce
+
+ALIGN 32
+$L$8x_reduce::
+ mul rbx
+ mov rax,QWORD PTR[16+rbp]
+ neg r8
+ mov r8,rdx
+ adc r8,0
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[32+rbp]
+ adc rdx,0
+ add r8,r9
+ mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[48+rbp]
+ adc rdx,0
+ add r9,r10
+ mov rsi,QWORD PTR[((32+8))+rsp]
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[64+rbp]
+ adc rdx,0
+ imul rsi,r8
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[80+rbp]
+ adc rdx,0
+ add r11,r12
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[96+rbp]
+ adc rdx,0
+ add r12,r13
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[112+rbp]
+ adc rdx,0
+ add r13,r14
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+ mov rbx,rsi
+ add r15,rax
+ mov rax,QWORD PTR[rbp]
+ adc rdx,0
+ add r14,r15
+ mov r15,rdx
+ adc r15,0
+
+ dec ecx
+ jnz $L$8x_reduce
+
+ lea rbp,QWORD PTR[128+rbp]
+ xor rax,rax
+ mov rdx,QWORD PTR[((8+8))+rsp]
+ cmp rbp,QWORD PTR[((0+8))+rsp]
+ jae $L$8x_no_tail
+
+DB 066h
+ add r8,QWORD PTR[rdi]
+ adc r9,QWORD PTR[8+rdi]
+ adc r10,QWORD PTR[16+rdi]
+ adc r11,QWORD PTR[24+rdi]
+ adc r12,QWORD PTR[32+rdi]
+ adc r13,QWORD PTR[40+rdi]
+ adc r14,QWORD PTR[48+rdi]
+ adc r15,QWORD PTR[56+rdi]
+ sbb rsi,rsi
+
+ mov rbx,QWORD PTR[((48+56+8))+rsp]
+ mov ecx,8
+ mov rax,QWORD PTR[rbp]
+ jmp $L$8x_tail
+
+ALIGN 32
+$L$8x_tail::
+ mul rbx
+ add r8,rax
+ mov rax,QWORD PTR[16+rbp]
+ mov QWORD PTR[rdi],r8
+ mov r8,rdx
+ adc r8,0
+
+ mul rbx
+ add r9,rax
+ mov rax,QWORD PTR[32+rbp]
+ adc rdx,0
+ add r8,r9
+ lea rdi,QWORD PTR[8+rdi]
+ mov r9,rdx
+ adc r9,0
+
+ mul rbx
+ add r10,rax
+ mov rax,QWORD PTR[48+rbp]
+ adc rdx,0
+ add r9,r10
+ mov r10,rdx
+ adc r10,0
+
+ mul rbx
+ add r11,rax
+ mov rax,QWORD PTR[64+rbp]
+ adc rdx,0
+ add r10,r11
+ mov r11,rdx
+ adc r11,0
+
+ mul rbx
+ add r12,rax
+ mov rax,QWORD PTR[80+rbp]
+ adc rdx,0
+ add r11,r12
+ mov r12,rdx
+ adc r12,0
+
+ mul rbx
+ add r13,rax
+ mov rax,QWORD PTR[96+rbp]
+ adc rdx,0
+ add r12,r13
+ mov r13,rdx
+ adc r13,0
+
+ mul rbx
+ add r14,rax
+ mov rax,QWORD PTR[112+rbp]
+ adc rdx,0
+ add r13,r14
+ mov r14,rdx
+ adc r14,0
+
+ mul rbx
+ mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp]
+ add r15,rax
+ adc rdx,0
+ add r14,r15
+ mov rax,QWORD PTR[rbp]
+ mov r15,rdx
+ adc r15,0
+
+ dec ecx
+ jnz $L$8x_tail
+
+ lea rbp,QWORD PTR[128+rbp]
+ mov rdx,QWORD PTR[((8+8))+rsp]
+ cmp rbp,QWORD PTR[((0+8))+rsp]
+ jae $L$8x_tail_done
+
+ mov rbx,QWORD PTR[((48+56+8))+rsp]
+ neg rsi
+ mov rax,QWORD PTR[rbp]
+ adc r8,QWORD PTR[rdi]
+ adc r9,QWORD PTR[8+rdi]
+ adc r10,QWORD PTR[16+rdi]
+ adc r11,QWORD PTR[24+rdi]
+ adc r12,QWORD PTR[32+rdi]
+ adc r13,QWORD PTR[40+rdi]
+ adc r14,QWORD PTR[48+rdi]
+ adc r15,QWORD PTR[56+rdi]
+ sbb rsi,rsi
+
+ mov ecx,8
+ jmp $L$8x_tail
+
+ALIGN 32
+$L$8x_tail_done::
+ add r8,QWORD PTR[rdx]
+ xor rax,rax
+
+ neg rsi
+$L$8x_no_tail::
+ adc r8,QWORD PTR[rdi]
+ adc r9,QWORD PTR[8+rdi]
+ adc r10,QWORD PTR[16+rdi]
+ adc r11,QWORD PTR[24+rdi]
+ adc r12,QWORD PTR[32+rdi]
+ adc r13,QWORD PTR[40+rdi]
+ adc r14,QWORD PTR[48+rdi]
+ adc r15,QWORD PTR[56+rdi]
+ adc rax,0
+ mov rcx,QWORD PTR[((-16))+rbp]
+ xor rsi,rsi
+
+DB 102,72,15,126,213
+
+ mov QWORD PTR[rdi],r8
+ mov QWORD PTR[8+rdi],r9
+DB 102,73,15,126,217
+ mov QWORD PTR[16+rdi],r10
+ mov QWORD PTR[24+rdi],r11
+ mov QWORD PTR[32+rdi],r12
+ mov QWORD PTR[40+rdi],r13
+ mov QWORD PTR[48+rdi],r14
+ mov QWORD PTR[56+rdi],r15
+ lea rdi,QWORD PTR[64+rdi]
+
+ cmp rdi,rdx
+ jb $L$8x_reduction_loop
+
+ sub rcx,r15
+ lea rbx,QWORD PTR[r9*1+rdi]
+ adc rsi,rsi
+ mov rcx,r9
+ or rax,rsi
+DB 102,72,15,126,207
+ xor rax,1
+DB 102,72,15,126,206
+ lea rbp,QWORD PTR[rax*8+rbp]
+ sar rcx,3+2
+ jmp $L$sqr4x_sub
+
+ALIGN 32
+$L$sqr4x_sub::
+DB 066h
+ mov r12,QWORD PTR[rbx]
+ mov r13,QWORD PTR[8+rbx]
+ sbb r12,QWORD PTR[rbp]
+ mov r14,QWORD PTR[16+rbx]
+ sbb r13,QWORD PTR[16+rbp]
+ mov r15,QWORD PTR[24+rbx]
+ lea rbx,QWORD PTR[32+rbx]
+ sbb r14,QWORD PTR[32+rbp]
+ mov QWORD PTR[rdi],r12
+ sbb r15,QWORD PTR[48+rbp]
+ lea rbp,QWORD PTR[64+rbp]
+ mov QWORD PTR[8+rdi],r13
+ mov QWORD PTR[16+rdi],r14
+ mov QWORD PTR[24+rdi],r15
+ lea rdi,QWORD PTR[32+rdi]
+
+ inc rcx
+ jnz $L$sqr4x_sub
+ mov r10,r9
+ neg r9
+ DB 0F3h,0C3h ;repret
+bn_sqr8x_internal ENDP
+PUBLIC bn_from_montgomery
+
+ALIGN 32
+bn_from_montgomery PROC PUBLIC
+ test DWORD PTR[48+rsp],7
+ jz bn_from_mont8x
+ xor eax,eax
+ DB 0F3h,0C3h ;repret
+bn_from_montgomery ENDP
+
+
+ALIGN 32
+bn_from_mont8x PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_bn_from_mont8x::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+DB 067h
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-40))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+DB 067h
+ mov r10d,r9d
+ shl r9d,3
+ shl r10d,3+2
+ neg r9
+ mov r8,QWORD PTR[r8]
+
+
+
+
+
+
+
+ lea r11,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,rsi
+ and r11,4095
+ cmp r10,r11
+ jb $L$from_sp_alt
+ sub rsp,r11
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ jmp $L$from_sp_done
+
+ALIGN 32
+$L$from_sp_alt::
+ lea r10,QWORD PTR[((4096-64))+r9*2]
+ lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ sub r11,r10
+ mov r10,0
+ cmovc r11,r10
+ sub rsp,r11
+$L$from_sp_done::
+ and rsp,-64
+ mov r10,r9
+ neg r9
+
+
+
+
+
+
+
+
+
+
+ mov QWORD PTR[32+rsp],r8
+ mov QWORD PTR[40+rsp],rax
+$L$from_body::
+ mov r11,r9
+ lea rax,QWORD PTR[48+rsp]
+ pxor xmm0,xmm0
+ jmp $L$mul_by_1
+
+ALIGN 32
+$L$mul_by_1::
+ movdqu xmm1,XMMWORD PTR[rsi]
+ movdqu xmm2,XMMWORD PTR[16+rsi]
+ movdqu xmm3,XMMWORD PTR[32+rsi]
+ movdqa XMMWORD PTR[r9*1+rax],xmm0
+ movdqu xmm4,XMMWORD PTR[48+rsi]
+ movdqa XMMWORD PTR[16+r9*1+rax],xmm0
+DB 048h,08dh,0b6h,040h,000h,000h,000h
+ movdqa XMMWORD PTR[rax],xmm1
+ movdqa XMMWORD PTR[32+r9*1+rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm2
+ movdqa XMMWORD PTR[48+r9*1+rax],xmm0
+ movdqa XMMWORD PTR[32+rax],xmm3
+ movdqa XMMWORD PTR[48+rax],xmm4
+ lea rax,QWORD PTR[64+rax]
+ sub r11,64
+ jnz $L$mul_by_1
+
+DB 102,72,15,110,207
+DB 102,72,15,110,209
+DB 067h
+ mov rbp,rcx
+DB 102,73,15,110,218
+ call sqr8x_reduction
+
+ pxor xmm0,xmm0
+ lea rax,QWORD PTR[48+rsp]
+ mov rsi,QWORD PTR[40+rsp]
+ jmp $L$from_mont_zero
+
+ALIGN 32
+$L$from_mont_zero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ movdqa XMMWORD PTR[32+rax],xmm0
+ movdqa XMMWORD PTR[48+rax],xmm0
+ lea rax,QWORD PTR[64+rax]
+ sub r9,32
+ jnz $L$from_mont_zero
+
+ mov rax,1
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$from_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_from_mont8x::
+bn_from_mont8x ENDP
+PUBLIC bn_scatter5
+
+ALIGN 16
+bn_scatter5 PROC PUBLIC
+ cmp edx,0
+ jz $L$scatter_epilogue
+ lea r8,QWORD PTR[r9*8+r8]
+$L$scatter::
+ mov rax,QWORD PTR[rcx]
+ lea rcx,QWORD PTR[8+rcx]
+ mov QWORD PTR[r8],rax
+ lea r8,QWORD PTR[256+r8]
+ sub edx,1
+ jnz $L$scatter
+$L$scatter_epilogue::
+ DB 0F3h,0C3h ;repret
+bn_scatter5 ENDP
+
+PUBLIC bn_gather5
+
+ALIGN 16
+bn_gather5 PROC PUBLIC
+$L$SEH_begin_bn_gather5::
+
+DB 048h,083h,0ech,028h
+DB 00fh,029h,034h,024h
+DB 00fh,029h,07ch,024h,010h
+ mov r11d,r9d
+ shr r9d,3
+ and r11,7
+ not r9d
+ lea rax,QWORD PTR[$L$magic_masks]
+ and r9d,3
+ lea r8,QWORD PTR[128+r11*8+r8]
+ movq xmm4,QWORD PTR[r9*8+rax]
+ movq xmm5,QWORD PTR[8+r9*8+rax]
+ movq xmm6,QWORD PTR[16+r9*8+rax]
+ movq xmm7,QWORD PTR[24+r9*8+rax]
+ jmp $L$gather
+ALIGN 16
+$L$gather::
+ movq xmm0,QWORD PTR[(((-128)))+r8]
+ movq xmm1,QWORD PTR[((-64))+r8]
+ pand xmm0,xmm4
+ movq xmm2,QWORD PTR[r8]
+ pand xmm1,xmm5
+ movq xmm3,QWORD PTR[64+r8]
+ pand xmm2,xmm6
+ por xmm0,xmm1
+ pand xmm3,xmm7
+DB 067h,067h
+ por xmm0,xmm2
+ lea r8,QWORD PTR[256+r8]
+ por xmm0,xmm3
+
+ movq QWORD PTR[rcx],xmm0
+ lea rcx,QWORD PTR[8+rcx]
+ sub edx,1
+ jnz $L$gather
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ lea rsp,QWORD PTR[40+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_bn_gather5::
+bn_gather5 ENDP
+ALIGN 64
+$L$magic_masks::
+ DD 0,0,0,0,0,0,-1,-1
+ DD 0,0,0,0,0,0,0,0
+DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
+DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
+DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
+DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
+DB 112,101,110,115,115,108,46,111,114,103,62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+mul_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea r10,QWORD PTR[$L$mul_epilogue]
+ cmp rbx,r10
+ jb $L$body_40
+
+ mov r10,QWORD PTR[192+r8]
+ mov rax,QWORD PTR[8+r10*8+rax]
+ jmp $L$body_proceed
+
+$L$body_40::
+ mov rax,QWORD PTR[40+rax]
+$L$body_proceed::
+
+ movaps xmm0,XMMWORD PTR[((-88))+rax]
+ movaps xmm1,XMMWORD PTR[((-72))+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ movups XMMWORD PTR[512+r8],xmm0
+ movups XMMWORD PTR[528+r8],xmm1
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+mul_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_bn_mul_mont_gather5
+ DD imagerel $L$SEH_end_bn_mul_mont_gather5
+ DD imagerel $L$SEH_info_bn_mul_mont_gather5
+
+ DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
+ DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
+ DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
+
+ DD imagerel $L$SEH_begin_bn_power5
+ DD imagerel $L$SEH_end_bn_power5
+ DD imagerel $L$SEH_info_bn_power5
+
+ DD imagerel $L$SEH_begin_bn_from_mont8x
+ DD imagerel $L$SEH_end_bn_from_mont8x
+ DD imagerel $L$SEH_info_bn_from_mont8x
+ DD imagerel $L$SEH_begin_bn_gather5
+ DD imagerel $L$SEH_end_bn_gather5
+ DD imagerel $L$SEH_info_bn_gather5
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_bn_mul_mont_gather5::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$mul_body,imagerel $L$mul_epilogue
+ALIGN 8
+$L$SEH_info_bn_mul4x_mont_gather5::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
+ALIGN 8
+$L$SEH_info_bn_power5::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$power5_body,imagerel $L$power5_epilogue
+ALIGN 8
+$L$SEH_info_bn_from_mont8x::
+DB 9,0,0,0
+ DD imagerel mul_handler
+ DD imagerel $L$from_body,imagerel $L$from_epilogue
+ALIGN 8
+$L$SEH_info_bn_gather5::
+DB 001h,00dh,005h,000h
+DB 00dh,078h,001h,000h
+DB 008h,068h,000h,000h
+DB 004h,042h,000h,000h
+ALIGN 8
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/cpu-x86_64-asm.asm b/win-x86_64/crypto/cpu-x86_64-asm.asm
new file mode 100644
index 0000000..dca66f5
--- /dev/null
+++ b/win-x86_64/crypto/cpu-x86_64-asm.asm
@@ -0,0 +1,158 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+PUBLIC OPENSSL_ia32_cpuid
+
+ALIGN 16
+OPENSSL_ia32_cpuid PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_OPENSSL_ia32_cpuid::
+ mov rdi,rcx
+
+
+
+
+ mov rdi,rcx
+ mov r8,rbx
+
+ xor eax,eax
+ mov DWORD PTR[8+rdi],eax
+ cpuid
+ mov r11d,eax
+
+ xor eax,eax
+ cmp ebx,0756e6547h
+ setne al
+ mov r9d,eax
+ cmp edx,049656e69h
+ setne al
+ or r9d,eax
+ cmp ecx,06c65746eh
+ setne al
+ or r9d,eax
+ jz $L$intel
+
+ cmp ebx,068747541h
+ setne al
+ mov r10d,eax
+ cmp edx,069746E65h
+ setne al
+ or r10d,eax
+ cmp ecx,0444D4163h
+ setne al
+ or r10d,eax
+ jnz $L$intel
+
+
+
+
+ mov eax,080000000h
+ cpuid
+
+
+ cmp eax,080000001h
+ jb $L$intel
+ mov r10d,eax
+ mov eax,080000001h
+ cpuid
+
+
+ or r9d,ecx
+ and r9d,000000801h
+
+ cmp r10d,080000008h
+ jb $L$intel
+
+ mov eax,080000008h
+ cpuid
+
+ movzx r10,cl
+ inc r10
+
+ mov eax,1
+ cpuid
+
+ bt edx,28
+ jnc $L$generic
+ shr ebx,16
+ cmp bl,r10b
+ ja $L$generic
+ and edx,0efffffffh
+ jmp $L$generic
+
+$L$intel::
+ cmp r11d,4
+ mov r10d,-1
+ jb $L$nocacheinfo
+
+ mov eax,4
+ mov ecx,0
+ cpuid
+ mov r10d,eax
+ shr r10d,14
+ and r10d,0fffh
+
+ cmp r11d,7
+ jb $L$nocacheinfo
+
+ mov eax,7
+ xor ecx,ecx
+ cpuid
+ mov DWORD PTR[8+rdi],ebx
+
+$L$nocacheinfo::
+ mov eax,1
+ cpuid
+
+ and edx,0bfefffffh
+ cmp r9d,0
+ jne $L$notintel
+ or edx,040000000h
+ and ah,15
+ cmp ah,15
+ jne $L$notintel
+ or edx,000100000h
+$L$notintel::
+ bt edx,28
+ jnc $L$generic
+ and edx,0efffffffh
+ cmp r10d,0
+ je $L$generic
+
+ or edx,010000000h
+ shr ebx,16
+ cmp bl,1
+ ja $L$generic
+ and edx,0efffffffh
+$L$generic::
+ and r9d,000000800h
+ and ecx,0fffff7ffh
+ or r9d,ecx
+
+ mov r10d,edx
+ bt r9d,27
+ jnc $L$clear_avx
+ xor ecx,ecx
+DB 00fh,001h,0d0h
+ and eax,6
+ cmp eax,6
+ je $L$done
+$L$clear_avx::
+ mov eax,0efffe7ffh
+ and r9d,eax
+ and DWORD PTR[8+rdi],0ffffffdfh
+$L$done::
+ mov DWORD PTR[4+rdi],r9d
+ mov DWORD PTR[rdi],r10d
+ mov rbx,r8
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_OPENSSL_ia32_cpuid::
+OPENSSL_ia32_cpuid ENDP
+
+
+.text$ ENDS
+END
diff --git a/win-x86_64/crypto/md5/md5-x86_64.asm b/win-x86_64/crypto/md5/md5-x86_64.asm
new file mode 100644
index 0000000..d2faa88
--- /dev/null
+++ b/win-x86_64/crypto/md5/md5-x86_64.asm
@@ -0,0 +1,778 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+ALIGN 16
+
+PUBLIC md5_block_asm_data_order
+
+md5_block_asm_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_md5_block_asm_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbp
+ push rbx
+ push r12
+ push r14
+ push r15
+$L$prologue::
+
+
+
+
+ mov rbp,rdi
+ shl rdx,6
+ lea rdi,QWORD PTR[rdx*1+rsi]
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+
+
+
+
+
+
+
+ cmp rsi,rdi
+ je $L$end
+
+
+$L$loop::
+ mov r8d,eax
+ mov r9d,ebx
+ mov r14d,ecx
+ mov r15d,edx
+ mov r10d,DWORD PTR[rsi]
+ mov r11d,edx
+ xor r11d,ecx
+ lea eax,DWORD PTR[((-680876936))+r10*1+rax]
+ and r11d,ebx
+ xor r11d,edx
+ mov r10d,DWORD PTR[4+rsi]
+ add eax,r11d
+ rol eax,7
+ mov r11d,ecx
+ add eax,ebx
+ xor r11d,ebx
+ lea edx,DWORD PTR[((-389564586))+r10*1+rdx]
+ and r11d,eax
+ xor r11d,ecx
+ mov r10d,DWORD PTR[8+rsi]
+ add edx,r11d
+ rol edx,12
+ mov r11d,ebx
+ add edx,eax
+ xor r11d,eax
+ lea ecx,DWORD PTR[606105819+r10*1+rcx]
+ and r11d,edx
+ xor r11d,ebx
+ mov r10d,DWORD PTR[12+rsi]
+ add ecx,r11d
+ rol ecx,17
+ mov r11d,eax
+ add ecx,edx
+ xor r11d,edx
+ lea ebx,DWORD PTR[((-1044525330))+r10*1+rbx]
+ and r11d,ecx
+ xor r11d,eax
+ mov r10d,DWORD PTR[16+rsi]
+ add ebx,r11d
+ rol ebx,22
+ mov r11d,edx
+ add ebx,ecx
+ xor r11d,ecx
+ lea eax,DWORD PTR[((-176418897))+r10*1+rax]
+ and r11d,ebx
+ xor r11d,edx
+ mov r10d,DWORD PTR[20+rsi]
+ add eax,r11d
+ rol eax,7
+ mov r11d,ecx
+ add eax,ebx
+ xor r11d,ebx
+ lea edx,DWORD PTR[1200080426+r10*1+rdx]
+ and r11d,eax
+ xor r11d,ecx
+ mov r10d,DWORD PTR[24+rsi]
+ add edx,r11d
+ rol edx,12
+ mov r11d,ebx
+ add edx,eax
+ xor r11d,eax
+ lea ecx,DWORD PTR[((-1473231341))+r10*1+rcx]
+ and r11d,edx
+ xor r11d,ebx
+ mov r10d,DWORD PTR[28+rsi]
+ add ecx,r11d
+ rol ecx,17
+ mov r11d,eax
+ add ecx,edx
+ xor r11d,edx
+ lea ebx,DWORD PTR[((-45705983))+r10*1+rbx]
+ and r11d,ecx
+ xor r11d,eax
+ mov r10d,DWORD PTR[32+rsi]
+ add ebx,r11d
+ rol ebx,22
+ mov r11d,edx
+ add ebx,ecx
+ xor r11d,ecx
+ lea eax,DWORD PTR[1770035416+r10*1+rax]
+ and r11d,ebx
+ xor r11d,edx
+ mov r10d,DWORD PTR[36+rsi]
+ add eax,r11d
+ rol eax,7
+ mov r11d,ecx
+ add eax,ebx
+ xor r11d,ebx
+ lea edx,DWORD PTR[((-1958414417))+r10*1+rdx]
+ and r11d,eax
+ xor r11d,ecx
+ mov r10d,DWORD PTR[40+rsi]
+ add edx,r11d
+ rol edx,12
+ mov r11d,ebx
+ add edx,eax
+ xor r11d,eax
+ lea ecx,DWORD PTR[((-42063))+r10*1+rcx]
+ and r11d,edx
+ xor r11d,ebx
+ mov r10d,DWORD PTR[44+rsi]
+ add ecx,r11d
+ rol ecx,17
+ mov r11d,eax
+ add ecx,edx
+ xor r11d,edx
+ lea ebx,DWORD PTR[((-1990404162))+r10*1+rbx]
+ and r11d,ecx
+ xor r11d,eax
+ mov r10d,DWORD PTR[48+rsi]
+ add ebx,r11d
+ rol ebx,22
+ mov r11d,edx
+ add ebx,ecx
+ xor r11d,ecx
+ lea eax,DWORD PTR[1804603682+r10*1+rax]
+ and r11d,ebx
+ xor r11d,edx
+ mov r10d,DWORD PTR[52+rsi]
+ add eax,r11d
+ rol eax,7
+ mov r11d,ecx
+ add eax,ebx
+ xor r11d,ebx
+ lea edx,DWORD PTR[((-40341101))+r10*1+rdx]
+ and r11d,eax
+ xor r11d,ecx
+ mov r10d,DWORD PTR[56+rsi]
+ add edx,r11d
+ rol edx,12
+ mov r11d,ebx
+ add edx,eax
+ xor r11d,eax
+ lea ecx,DWORD PTR[((-1502002290))+r10*1+rcx]
+ and r11d,edx
+ xor r11d,ebx
+ mov r10d,DWORD PTR[60+rsi]
+ add ecx,r11d
+ rol ecx,17
+ mov r11d,eax
+ add ecx,edx
+ xor r11d,edx
+ lea ebx,DWORD PTR[1236535329+r10*1+rbx]
+ and r11d,ecx
+ xor r11d,eax
+ mov r10d,DWORD PTR[rsi]
+ add ebx,r11d
+ rol ebx,22
+ mov r11d,edx
+ add ebx,ecx
+ mov r10d,DWORD PTR[4+rsi]
+ mov r11d,edx
+ mov r12d,edx
+ not r11d
+ lea eax,DWORD PTR[((-165796510))+r10*1+rax]
+ and r12d,ebx
+ and r11d,ecx
+ mov r10d,DWORD PTR[24+rsi]
+ or r12d,r11d
+ mov r11d,ecx
+ add eax,r12d
+ mov r12d,ecx
+ rol eax,5
+ add eax,ebx
+ not r11d
+ lea edx,DWORD PTR[((-1069501632))+r10*1+rdx]
+ and r12d,eax
+ and r11d,ebx
+ mov r10d,DWORD PTR[44+rsi]
+ or r12d,r11d
+ mov r11d,ebx
+ add edx,r12d
+ mov r12d,ebx
+ rol edx,9
+ add edx,eax
+ not r11d
+ lea ecx,DWORD PTR[643717713+r10*1+rcx]
+ and r12d,edx
+ and r11d,eax
+ mov r10d,DWORD PTR[rsi]
+ or r12d,r11d
+ mov r11d,eax
+ add ecx,r12d
+ mov r12d,eax
+ rol ecx,14
+ add ecx,edx
+ not r11d
+ lea ebx,DWORD PTR[((-373897302))+r10*1+rbx]
+ and r12d,ecx
+ and r11d,edx
+ mov r10d,DWORD PTR[20+rsi]
+ or r12d,r11d
+ mov r11d,edx
+ add ebx,r12d
+ mov r12d,edx
+ rol ebx,20
+ add ebx,ecx
+ not r11d
+ lea eax,DWORD PTR[((-701558691))+r10*1+rax]
+ and r12d,ebx
+ and r11d,ecx
+ mov r10d,DWORD PTR[40+rsi]
+ or r12d,r11d
+ mov r11d,ecx
+ add eax,r12d
+ mov r12d,ecx
+ rol eax,5
+ add eax,ebx
+ not r11d
+ lea edx,DWORD PTR[38016083+r10*1+rdx]
+ and r12d,eax
+ and r11d,ebx
+ mov r10d,DWORD PTR[60+rsi]
+ or r12d,r11d
+ mov r11d,ebx
+ add edx,r12d
+ mov r12d,ebx
+ rol edx,9
+ add edx,eax
+ not r11d
+ lea ecx,DWORD PTR[((-660478335))+r10*1+rcx]
+ and r12d,edx
+ and r11d,eax
+ mov r10d,DWORD PTR[16+rsi]
+ or r12d,r11d
+ mov r11d,eax
+ add ecx,r12d
+ mov r12d,eax
+ rol ecx,14
+ add ecx,edx
+ not r11d
+ lea ebx,DWORD PTR[((-405537848))+r10*1+rbx]
+ and r12d,ecx
+ and r11d,edx
+ mov r10d,DWORD PTR[36+rsi]
+ or r12d,r11d
+ mov r11d,edx
+ add ebx,r12d
+ mov r12d,edx
+ rol ebx,20
+ add ebx,ecx
+ not r11d
+ lea eax,DWORD PTR[568446438+r10*1+rax]
+ and r12d,ebx
+ and r11d,ecx
+ mov r10d,DWORD PTR[56+rsi]
+ or r12d,r11d
+ mov r11d,ecx
+ add eax,r12d
+ mov r12d,ecx
+ rol eax,5
+ add eax,ebx
+ not r11d
+ lea edx,DWORD PTR[((-1019803690))+r10*1+rdx]
+ and r12d,eax
+ and r11d,ebx
+ mov r10d,DWORD PTR[12+rsi]
+ or r12d,r11d
+ mov r11d,ebx
+ add edx,r12d
+ mov r12d,ebx
+ rol edx,9
+ add edx,eax
+ not r11d
+ lea ecx,DWORD PTR[((-187363961))+r10*1+rcx]
+ and r12d,edx
+ and r11d,eax
+ mov r10d,DWORD PTR[32+rsi]
+ or r12d,r11d
+ mov r11d,eax
+ add ecx,r12d
+ mov r12d,eax
+ rol ecx,14
+ add ecx,edx
+ not r11d
+ lea ebx,DWORD PTR[1163531501+r10*1+rbx]
+ and r12d,ecx
+ and r11d,edx
+ mov r10d,DWORD PTR[52+rsi]
+ or r12d,r11d
+ mov r11d,edx
+ add ebx,r12d
+ mov r12d,edx
+ rol ebx,20
+ add ebx,ecx
+ not r11d
+ lea eax,DWORD PTR[((-1444681467))+r10*1+rax]
+ and r12d,ebx
+ and r11d,ecx
+ mov r10d,DWORD PTR[8+rsi]
+ or r12d,r11d
+ mov r11d,ecx
+ add eax,r12d
+ mov r12d,ecx
+ rol eax,5
+ add eax,ebx
+ not r11d
+ lea edx,DWORD PTR[((-51403784))+r10*1+rdx]
+ and r12d,eax
+ and r11d,ebx
+ mov r10d,DWORD PTR[28+rsi]
+ or r12d,r11d
+ mov r11d,ebx
+ add edx,r12d
+ mov r12d,ebx
+ rol edx,9
+ add edx,eax
+ not r11d
+ lea ecx,DWORD PTR[1735328473+r10*1+rcx]
+ and r12d,edx
+ and r11d,eax
+ mov r10d,DWORD PTR[48+rsi]
+ or r12d,r11d
+ mov r11d,eax
+ add ecx,r12d
+ mov r12d,eax
+ rol ecx,14
+ add ecx,edx
+ not r11d
+ lea ebx,DWORD PTR[((-1926607734))+r10*1+rbx]
+ and r12d,ecx
+ and r11d,edx
+ mov r10d,DWORD PTR[rsi]
+ or r12d,r11d
+ mov r11d,edx
+ add ebx,r12d
+ mov r12d,edx
+ rol ebx,20
+ add ebx,ecx
+ mov r10d,DWORD PTR[20+rsi]
+ mov r11d,ecx
+ lea eax,DWORD PTR[((-378558))+r10*1+rax]
+ mov r10d,DWORD PTR[32+rsi]
+ xor r11d,edx
+ xor r11d,ebx
+ add eax,r11d
+ rol eax,4
+ mov r11d,ebx
+ add eax,ebx
+ lea edx,DWORD PTR[((-2022574463))+r10*1+rdx]
+ mov r10d,DWORD PTR[44+rsi]
+ xor r11d,ecx
+ xor r11d,eax
+ add edx,r11d
+ rol edx,11
+ mov r11d,eax
+ add edx,eax
+ lea ecx,DWORD PTR[1839030562+r10*1+rcx]
+ mov r10d,DWORD PTR[56+rsi]
+ xor r11d,ebx
+ xor r11d,edx
+ add ecx,r11d
+ rol ecx,16
+ mov r11d,edx
+ add ecx,edx
+ lea ebx,DWORD PTR[((-35309556))+r10*1+rbx]
+ mov r10d,DWORD PTR[4+rsi]
+ xor r11d,eax
+ xor r11d,ecx
+ add ebx,r11d
+ rol ebx,23
+ mov r11d,ecx
+ add ebx,ecx
+ lea eax,DWORD PTR[((-1530992060))+r10*1+rax]
+ mov r10d,DWORD PTR[16+rsi]
+ xor r11d,edx
+ xor r11d,ebx
+ add eax,r11d
+ rol eax,4
+ mov r11d,ebx
+ add eax,ebx
+ lea edx,DWORD PTR[1272893353+r10*1+rdx]
+ mov r10d,DWORD PTR[28+rsi]
+ xor r11d,ecx
+ xor r11d,eax
+ add edx,r11d
+ rol edx,11
+ mov r11d,eax
+ add edx,eax
+ lea ecx,DWORD PTR[((-155497632))+r10*1+rcx]
+ mov r10d,DWORD PTR[40+rsi]
+ xor r11d,ebx
+ xor r11d,edx
+ add ecx,r11d
+ rol ecx,16
+ mov r11d,edx
+ add ecx,edx
+ lea ebx,DWORD PTR[((-1094730640))+r10*1+rbx]
+ mov r10d,DWORD PTR[52+rsi]
+ xor r11d,eax
+ xor r11d,ecx
+ add ebx,r11d
+ rol ebx,23
+ mov r11d,ecx
+ add ebx,ecx
+ lea eax,DWORD PTR[681279174+r10*1+rax]
+ mov r10d,DWORD PTR[rsi]
+ xor r11d,edx
+ xor r11d,ebx
+ add eax,r11d
+ rol eax,4
+ mov r11d,ebx
+ add eax,ebx
+ lea edx,DWORD PTR[((-358537222))+r10*1+rdx]
+ mov r10d,DWORD PTR[12+rsi]
+ xor r11d,ecx
+ xor r11d,eax
+ add edx,r11d
+ rol edx,11
+ mov r11d,eax
+ add edx,eax
+ lea ecx,DWORD PTR[((-722521979))+r10*1+rcx]
+ mov r10d,DWORD PTR[24+rsi]
+ xor r11d,ebx
+ xor r11d,edx
+ add ecx,r11d
+ rol ecx,16
+ mov r11d,edx
+ add ecx,edx
+ lea ebx,DWORD PTR[76029189+r10*1+rbx]
+ mov r10d,DWORD PTR[36+rsi]
+ xor r11d,eax
+ xor r11d,ecx
+ add ebx,r11d
+ rol ebx,23
+ mov r11d,ecx
+ add ebx,ecx
+ lea eax,DWORD PTR[((-640364487))+r10*1+rax]
+ mov r10d,DWORD PTR[48+rsi]
+ xor r11d,edx
+ xor r11d,ebx
+ add eax,r11d
+ rol eax,4
+ mov r11d,ebx
+ add eax,ebx
+ lea edx,DWORD PTR[((-421815835))+r10*1+rdx]
+ mov r10d,DWORD PTR[60+rsi]
+ xor r11d,ecx
+ xor r11d,eax
+ add edx,r11d
+ rol edx,11
+ mov r11d,eax
+ add edx,eax
+ lea ecx,DWORD PTR[530742520+r10*1+rcx]
+ mov r10d,DWORD PTR[8+rsi]
+ xor r11d,ebx
+ xor r11d,edx
+ add ecx,r11d
+ rol ecx,16
+ mov r11d,edx
+ add ecx,edx
+ lea ebx,DWORD PTR[((-995338651))+r10*1+rbx]
+ mov r10d,DWORD PTR[rsi]
+ xor r11d,eax
+ xor r11d,ecx
+ add ebx,r11d
+ rol ebx,23
+ mov r11d,ecx
+ add ebx,ecx
+ mov r10d,DWORD PTR[rsi]
+ mov r11d,0ffffffffh
+ xor r11d,edx
+ lea eax,DWORD PTR[((-198630844))+r10*1+rax]
+ or r11d,ebx
+ xor r11d,ecx
+ add eax,r11d
+ mov r10d,DWORD PTR[28+rsi]
+ mov r11d,0ffffffffh
+ rol eax,6
+ xor r11d,ecx
+ add eax,ebx
+ lea edx,DWORD PTR[1126891415+r10*1+rdx]
+ or r11d,eax
+ xor r11d,ebx
+ add edx,r11d
+ mov r10d,DWORD PTR[56+rsi]
+ mov r11d,0ffffffffh
+ rol edx,10
+ xor r11d,ebx
+ add edx,eax
+ lea ecx,DWORD PTR[((-1416354905))+r10*1+rcx]
+ or r11d,edx
+ xor r11d,eax
+ add ecx,r11d
+ mov r10d,DWORD PTR[20+rsi]
+ mov r11d,0ffffffffh
+ rol ecx,15
+ xor r11d,eax
+ add ecx,edx
+ lea ebx,DWORD PTR[((-57434055))+r10*1+rbx]
+ or r11d,ecx
+ xor r11d,edx
+ add ebx,r11d
+ mov r10d,DWORD PTR[48+rsi]
+ mov r11d,0ffffffffh
+ rol ebx,21
+ xor r11d,edx
+ add ebx,ecx
+ lea eax,DWORD PTR[1700485571+r10*1+rax]
+ or r11d,ebx
+ xor r11d,ecx
+ add eax,r11d
+ mov r10d,DWORD PTR[12+rsi]
+ mov r11d,0ffffffffh
+ rol eax,6
+ xor r11d,ecx
+ add eax,ebx
+ lea edx,DWORD PTR[((-1894986606))+r10*1+rdx]
+ or r11d,eax
+ xor r11d,ebx
+ add edx,r11d
+ mov r10d,DWORD PTR[40+rsi]
+ mov r11d,0ffffffffh
+ rol edx,10
+ xor r11d,ebx
+ add edx,eax
+ lea ecx,DWORD PTR[((-1051523))+r10*1+rcx]
+ or r11d,edx
+ xor r11d,eax
+ add ecx,r11d
+ mov r10d,DWORD PTR[4+rsi]
+ mov r11d,0ffffffffh
+ rol ecx,15
+ xor r11d,eax
+ add ecx,edx
+ lea ebx,DWORD PTR[((-2054922799))+r10*1+rbx]
+ or r11d,ecx
+ xor r11d,edx
+ add ebx,r11d
+ mov r10d,DWORD PTR[32+rsi]
+ mov r11d,0ffffffffh
+ rol ebx,21
+ xor r11d,edx
+ add ebx,ecx
+ lea eax,DWORD PTR[1873313359+r10*1+rax]
+ or r11d,ebx
+ xor r11d,ecx
+ add eax,r11d
+ mov r10d,DWORD PTR[60+rsi]
+ mov r11d,0ffffffffh
+ rol eax,6
+ xor r11d,ecx
+ add eax,ebx
+ lea edx,DWORD PTR[((-30611744))+r10*1+rdx]
+ or r11d,eax
+ xor r11d,ebx
+ add edx,r11d
+ mov r10d,DWORD PTR[24+rsi]
+ mov r11d,0ffffffffh
+ rol edx,10
+ xor r11d,ebx
+ add edx,eax
+ lea ecx,DWORD PTR[((-1560198380))+r10*1+rcx]
+ or r11d,edx
+ xor r11d,eax
+ add ecx,r11d
+ mov r10d,DWORD PTR[52+rsi]
+ mov r11d,0ffffffffh
+ rol ecx,15
+ xor r11d,eax
+ add ecx,edx
+ lea ebx,DWORD PTR[1309151649+r10*1+rbx]
+ or r11d,ecx
+ xor r11d,edx
+ add ebx,r11d
+ mov r10d,DWORD PTR[16+rsi]
+ mov r11d,0ffffffffh
+ rol ebx,21
+ xor r11d,edx
+ add ebx,ecx
+ lea eax,DWORD PTR[((-145523070))+r10*1+rax]
+ or r11d,ebx
+ xor r11d,ecx
+ add eax,r11d
+ mov r10d,DWORD PTR[44+rsi]
+ mov r11d,0ffffffffh
+ rol eax,6
+ xor r11d,ecx
+ add eax,ebx
+ lea edx,DWORD PTR[((-1120210379))+r10*1+rdx]
+ or r11d,eax
+ xor r11d,ebx
+ add edx,r11d
+ mov r10d,DWORD PTR[8+rsi]
+ mov r11d,0ffffffffh
+ rol edx,10
+ xor r11d,ebx
+ add edx,eax
+ lea ecx,DWORD PTR[718787259+r10*1+rcx]
+ or r11d,edx
+ xor r11d,eax
+ add ecx,r11d
+ mov r10d,DWORD PTR[36+rsi]
+ mov r11d,0ffffffffh
+ rol ecx,15
+ xor r11d,eax
+ add ecx,edx
+ lea ebx,DWORD PTR[((-343485551))+r10*1+rbx]
+ or r11d,ecx
+ xor r11d,edx
+ add ebx,r11d
+ mov r10d,DWORD PTR[rsi]
+ mov r11d,0ffffffffh
+ rol ebx,21
+ xor r11d,edx
+ add ebx,ecx
+
+ add eax,r8d
+ add ebx,r9d
+ add ecx,r14d
+ add edx,r15d
+
+
+ add rsi,64
+ cmp rsi,rdi
+ jb $L$loop
+
+
+$L$end::
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ mov r15,QWORD PTR[rsp]
+ mov r14,QWORD PTR[8+rsp]
+ mov r12,QWORD PTR[16+rsp]
+ mov rbx,QWORD PTR[24+rsp]
+ mov rbp,QWORD PTR[32+rsp]
+ add rsp,40
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_md5_block_asm_data_order::
+md5_block_asm_data_order ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$prologue]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rax,QWORD PTR[40+rax]
+
+ mov rbp,QWORD PTR[((-8))+rax]
+ mov rbx,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r14,QWORD PTR[((-32))+rax]
+ mov r15,QWORD PTR[((-40))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_md5_block_asm_data_order
+ DD imagerel $L$SEH_end_md5_block_asm_data_order
+ DD imagerel $L$SEH_info_md5_block_asm_data_order
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_md5_block_asm_data_order::
+DB 9,0,0,0
+ DD imagerel se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm b/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm
new file mode 100644
index 0000000..828be8d
--- /dev/null
+++ b/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm
@@ -0,0 +1,19 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+PUBLIC aesni_gcm_encrypt
+
+aesni_gcm_encrypt PROC PUBLIC
+ xor eax,eax
+ DB 0F3h,0C3h ;repret
+aesni_gcm_encrypt ENDP
+
+PUBLIC aesni_gcm_decrypt
+
+aesni_gcm_decrypt PROC PUBLIC
+ xor eax,eax
+ DB 0F3h,0C3h ;repret
+aesni_gcm_decrypt ENDP
+
+.text$ ENDS
+END
diff --git a/win-x86_64/crypto/modes/ghash-x86_64.asm b/win-x86_64/crypto/modes/ghash-x86_64.asm
new file mode 100644
index 0000000..9993d75
--- /dev/null
+++ b/win-x86_64/crypto/modes/ghash-x86_64.asm
@@ -0,0 +1,1510 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC gcm_gmult_4bit
+
+ALIGN 16
+gcm_gmult_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_gcm_gmult_4bit::
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push rbx
+ push rbp
+ push r12
+$L$gmult_prologue::
+
+ movzx r8,BYTE PTR[15+rdi]
+ lea r11,QWORD PTR[$L$rem_4bit]
+ xor rax,rax
+ xor rbx,rbx
+ mov al,r8b
+ mov bl,r8b
+ shl al,4
+ mov rcx,14
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ mov rdx,r8
+ jmp $L$oop1
+
+ALIGN 16
+$L$oop1::
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ mov al,BYTE PTR[rcx*1+rdi]
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ mov bl,al
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ shl al,4
+ xor r8,r10
+ dec rcx
+ js $L$break1
+
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+ jmp $L$oop1
+
+ALIGN 16
+$L$break1::
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ xor r8,r10
+ xor r9,QWORD PTR[rdx*8+r11]
+
+ bswap r8
+ bswap r9
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+
+ mov rbx,QWORD PTR[16+rsp]
+ lea rsp,QWORD PTR[24+rsp]
+$L$gmult_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_gcm_gmult_4bit::
+gcm_gmult_4bit ENDP
+PUBLIC gcm_ghash_4bit
+
+ALIGN 16
+gcm_ghash_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_gcm_ghash_4bit::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,280
+$L$ghash_prologue::
+ mov r14,rdx
+ mov r15,rcx
+ sub rsi,-128
+ lea rbp,QWORD PTR[((16+128))+rsp]
+ xor edx,edx
+ mov r8,QWORD PTR[((0+0-128))+rsi]
+ mov rax,QWORD PTR[((0+8-128))+rsi]
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov r9,QWORD PTR[((16+0-128))+rsi]
+ shl dl,4
+ mov rbx,QWORD PTR[((16+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[rbp],r8
+ mov r8,QWORD PTR[((32+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((0-128))+rbp],rax
+ mov rax,QWORD PTR[((32+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[1+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[8+rbp],r9
+ mov r9,QWORD PTR[((48+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((8-128))+rbp],rbx
+ mov rbx,QWORD PTR[((48+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[2+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[16+rbp],r8
+ mov r8,QWORD PTR[((64+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((16-128))+rbp],rax
+ mov rax,QWORD PTR[((64+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[3+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[24+rbp],r9
+ mov r9,QWORD PTR[((80+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((24-128))+rbp],rbx
+ mov rbx,QWORD PTR[((80+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[4+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[32+rbp],r8
+ mov r8,QWORD PTR[((96+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((32-128))+rbp],rax
+ mov rax,QWORD PTR[((96+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[5+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[40+rbp],r9
+ mov r9,QWORD PTR[((112+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((40-128))+rbp],rbx
+ mov rbx,QWORD PTR[((112+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[6+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[48+rbp],r8
+ mov r8,QWORD PTR[((128+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((48-128))+rbp],rax
+ mov rax,QWORD PTR[((128+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[7+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[56+rbp],r9
+ mov r9,QWORD PTR[((144+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((56-128))+rbp],rbx
+ mov rbx,QWORD PTR[((144+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[8+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[64+rbp],r8
+ mov r8,QWORD PTR[((160+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((64-128))+rbp],rax
+ mov rax,QWORD PTR[((160+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[9+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[72+rbp],r9
+ mov r9,QWORD PTR[((176+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((72-128))+rbp],rbx
+ mov rbx,QWORD PTR[((176+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[10+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[80+rbp],r8
+ mov r8,QWORD PTR[((192+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((80-128))+rbp],rax
+ mov rax,QWORD PTR[((192+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[11+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[88+rbp],r9
+ mov r9,QWORD PTR[((208+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((88-128))+rbp],rbx
+ mov rbx,QWORD PTR[((208+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[12+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[96+rbp],r8
+ mov r8,QWORD PTR[((224+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((96-128))+rbp],rax
+ mov rax,QWORD PTR[((224+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[13+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[104+rbp],r9
+ mov r9,QWORD PTR[((240+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((104-128))+rbp],rbx
+ mov rbx,QWORD PTR[((240+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[14+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[112+rbp],r8
+ shl dl,4
+ mov QWORD PTR[((112-128))+rbp],rax
+ shl r10,60
+ mov BYTE PTR[15+rsp],dl
+ or rbx,r10
+ mov QWORD PTR[120+rbp],r9
+ mov QWORD PTR[((120-128))+rbp],rbx
+ add rsi,-128
+ mov r8,QWORD PTR[8+rdi]
+ mov r9,QWORD PTR[rdi]
+ add r15,r14
+ lea r11,QWORD PTR[$L$rem_8bit]
+ jmp $L$outer_loop
+ALIGN 16
+$L$outer_loop::
+ xor r9,QWORD PTR[r14]
+ mov rdx,QWORD PTR[8+r14]
+ lea r14,QWORD PTR[16+r14]
+ xor rdx,r8
+ mov QWORD PTR[rdi],r9
+ mov QWORD PTR[8+rdi],rdx
+ shr rdx,32
+ xor rax,rax
+ rol edx,8
+ mov al,dl
+ movzx ebx,dl
+ shl al,4
+ shr ebx,4
+ rol edx,8
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ xor r12,r8
+ mov r10,r9
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[8+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[4+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ and ecx,240
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[((-4))+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ movzx r12,WORD PTR[r12*2+r11]
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ shl r12,48
+ xor r8,r10
+ xor r9,r12
+ movzx r13,r8b
+ shr r8,4
+ mov r10,r9
+ shl r13b,4
+ shr r9,4
+ xor r8,QWORD PTR[8+rcx*1+rsi]
+ movzx r13,WORD PTR[r13*2+r11]
+ shl r10,60
+ xor r9,QWORD PTR[rcx*1+rsi]
+ xor r8,r10
+ shl r13,48
+ bswap r8
+ xor r9,r13
+ bswap r9
+ cmp r14,r15
+ jb $L$outer_loop
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+
+ lea rsi,QWORD PTR[280+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$ghash_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_gcm_ghash_4bit::
+gcm_ghash_4bit ENDP
+PUBLIC gcm_init_clmul
+
+ALIGN 16
+gcm_init_clmul PROC PUBLIC
+$L$_init_clmul::
+$L$SEH_begin_gcm_init_clmul::
+
+DB 048h,083h,0ech,018h
+DB 00fh,029h,034h,024h
+ movdqu xmm2,XMMWORD PTR[rdx]
+ pshufd xmm2,xmm2,78
+
+
+ pshufd xmm4,xmm2,255
+ movdqa xmm3,xmm2
+ psllq xmm2,1
+ pxor xmm5,xmm5
+ psrlq xmm3,63
+ pcmpgtd xmm5,xmm4
+ pslldq xmm3,8
+ por xmm2,xmm3
+
+
+ pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial]
+ pxor xmm2,xmm5
+
+
+ pshufd xmm6,xmm2,78
+ movdqa xmm0,xmm2
+ pxor xmm6,xmm2
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,222,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm2,78
+ pshufd xmm4,xmm0,78
+ pxor xmm3,xmm2
+ movdqu XMMWORD PTR[rcx],xmm2
+ pxor xmm4,xmm0
+ movdqu XMMWORD PTR[16+rcx],xmm0
+DB 102,15,58,15,227,8
+ movdqu XMMWORD PTR[32+rcx],xmm4
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,222,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ movdqa xmm5,xmm0
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,222,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm5,78
+ pshufd xmm4,xmm0,78
+ pxor xmm3,xmm5
+ movdqu XMMWORD PTR[48+rcx],xmm5
+ pxor xmm4,xmm0
+ movdqu XMMWORD PTR[64+rcx],xmm0
+DB 102,15,58,15,227,8
+ movdqu XMMWORD PTR[80+rcx],xmm4
+ movaps xmm6,XMMWORD PTR[rsp]
+ lea rsp,QWORD PTR[24+rsp]
+$L$SEH_end_gcm_init_clmul::
+ DB 0F3h,0C3h ;repret
+gcm_init_clmul ENDP
+PUBLIC gcm_gmult_clmul
+
+ALIGN 16
+gcm_gmult_clmul PROC PUBLIC
+$L$_gmult_clmul::
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
+ movdqu xmm2,XMMWORD PTR[rdx]
+ movdqu xmm4,XMMWORD PTR[32+rdx]
+DB 102,15,56,0,197
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+DB 102,15,56,0,197
+ movdqu XMMWORD PTR[rcx],xmm0
+ DB 0F3h,0C3h ;repret
+gcm_gmult_clmul ENDP
+PUBLIC gcm_ghash_clmul
+
+ALIGN 32
+gcm_ghash_clmul PROC PUBLIC
+$L$_ghash_clmul::
+ lea rax,QWORD PTR[((-136))+rsp]
+$L$SEH_begin_gcm_ghash_clmul::
+
+DB 048h,08dh,060h,0e0h
+DB 00fh,029h,070h,0e0h
+DB 00fh,029h,078h,0f0h
+DB 044h,00fh,029h,000h
+DB 044h,00fh,029h,048h,010h
+DB 044h,00fh,029h,050h,020h
+DB 044h,00fh,029h,058h,030h
+DB 044h,00fh,029h,060h,040h
+DB 044h,00fh,029h,068h,050h
+DB 044h,00fh,029h,070h,060h
+DB 044h,00fh,029h,078h,070h
+ movdqa xmm10,XMMWORD PTR[$L$bswap_mask]
+
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqu xmm2,XMMWORD PTR[rdx]
+ movdqu xmm7,XMMWORD PTR[32+rdx]
+DB 102,65,15,56,0,194
+
+ sub r9,010h
+ jz $L$odd_tail
+
+ movdqu xmm6,XMMWORD PTR[16+rdx]
+ mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ cmp r9,030h
+ jb $L$skip4x
+
+ and eax,71303168
+ cmp eax,4194304
+ je $L$skip4x
+
+ sub r9,030h
+ mov rax,0A040608020C0E000h
+ movdqu xmm14,XMMWORD PTR[48+rdx]
+ movdqu xmm15,XMMWORD PTR[64+rdx]
+
+
+
+
+ movdqu xmm3,XMMWORD PTR[48+r8]
+ movdqu xmm11,XMMWORD PTR[32+r8]
+DB 102,65,15,56,0,218
+DB 102,69,15,56,0,218
+ movdqa xmm5,xmm3
+ pshufd xmm4,xmm3,78
+ pxor xmm4,xmm3
+DB 102,15,58,68,218,0
+DB 102,15,58,68,234,17
+DB 102,15,58,68,231,0
+
+ movdqa xmm13,xmm11
+ pshufd xmm12,xmm11,78
+ pxor xmm12,xmm11
+DB 102,68,15,58,68,222,0
+DB 102,68,15,58,68,238,17
+DB 102,68,15,58,68,231,16
+ xorps xmm3,xmm11
+ xorps xmm5,xmm13
+ movups xmm7,XMMWORD PTR[80+rdx]
+ xorps xmm4,xmm12
+
+ movdqu xmm11,XMMWORD PTR[16+r8]
+ movdqu xmm8,XMMWORD PTR[r8]
+DB 102,69,15,56,0,218
+DB 102,69,15,56,0,194
+ movdqa xmm13,xmm11
+ pshufd xmm12,xmm11,78
+ pxor xmm0,xmm8
+ pxor xmm12,xmm11
+DB 102,69,15,58,68,222,0
+ movdqa xmm1,xmm0
+ pshufd xmm8,xmm0,78
+ pxor xmm8,xmm0
+DB 102,69,15,58,68,238,17
+DB 102,68,15,58,68,231,0
+ xorps xmm3,xmm11
+ xorps xmm5,xmm13
+
+ lea r8,QWORD PTR[64+r8]
+ sub r9,040h
+ jc $L$tail4x
+
+ jmp $L$mod4_loop
+ALIGN 32
+$L$mod4_loop::
+DB 102,65,15,58,68,199,0
+ xorps xmm4,xmm12
+ movdqu xmm11,XMMWORD PTR[48+r8]
+DB 102,69,15,56,0,218
+DB 102,65,15,58,68,207,17
+ xorps xmm0,xmm3
+ movdqu xmm3,XMMWORD PTR[32+r8]
+ movdqa xmm13,xmm11
+DB 102,68,15,58,68,199,16
+ pshufd xmm12,xmm11,78
+ xorps xmm1,xmm5
+ pxor xmm12,xmm11
+DB 102,65,15,56,0,218
+ movups xmm7,XMMWORD PTR[32+rdx]
+ xorps xmm8,xmm4
+DB 102,68,15,58,68,218,0
+ pshufd xmm4,xmm3,78
+
+ pxor xmm8,xmm0
+ movdqa xmm5,xmm3
+ pxor xmm8,xmm1
+ pxor xmm4,xmm3
+ movdqa xmm9,xmm8
+DB 102,68,15,58,68,234,17
+ pslldq xmm8,8
+ psrldq xmm9,8
+ pxor xmm0,xmm8
+ movdqa xmm8,XMMWORD PTR[$L$7_mask]
+ pxor xmm1,xmm9
+DB 102,76,15,110,200
+
+ pand xmm8,xmm0
+DB 102,69,15,56,0,200
+ pxor xmm9,xmm0
+DB 102,68,15,58,68,231,0
+ psllq xmm9,57
+ movdqa xmm8,xmm9
+ pslldq xmm9,8
+DB 102,15,58,68,222,0
+ psrldq xmm8,8
+ pxor xmm0,xmm9
+ pxor xmm1,xmm8
+ movdqu xmm8,XMMWORD PTR[r8]
+
+ movdqa xmm9,xmm0
+ psrlq xmm0,1
+DB 102,15,58,68,238,17
+ xorps xmm3,xmm11
+ movdqu xmm11,XMMWORD PTR[16+r8]
+DB 102,69,15,56,0,218
+DB 102,15,58,68,231,16
+ xorps xmm5,xmm13
+ movups xmm7,XMMWORD PTR[80+rdx]
+DB 102,69,15,56,0,194
+ pxor xmm1,xmm9
+ pxor xmm9,xmm0
+ psrlq xmm0,5
+
+ movdqa xmm13,xmm11
+ pxor xmm4,xmm12
+ pshufd xmm12,xmm11,78
+ pxor xmm0,xmm9
+ pxor xmm1,xmm8
+ pxor xmm12,xmm11
+DB 102,69,15,58,68,222,0
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ movdqa xmm1,xmm0
+DB 102,69,15,58,68,238,17
+ xorps xmm3,xmm11
+ pshufd xmm8,xmm0,78
+ pxor xmm8,xmm0
+
+DB 102,68,15,58,68,231,0
+ xorps xmm5,xmm13
+
+ lea r8,QWORD PTR[64+r8]
+ sub r9,040h
+ jnc $L$mod4_loop
+
+$L$tail4x::
+DB 102,65,15,58,68,199,0
+DB 102,65,15,58,68,207,17
+DB 102,68,15,58,68,199,16
+ xorps xmm4,xmm12
+ xorps xmm0,xmm3
+ xorps xmm1,xmm5
+ pxor xmm1,xmm0
+ pxor xmm8,xmm4
+
+ pxor xmm8,xmm1
+ pxor xmm1,xmm0
+
+ movdqa xmm9,xmm8
+ psrldq xmm8,8
+ pslldq xmm9,8
+ pxor xmm1,xmm8
+ pxor xmm0,xmm9
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ add r9,040h
+ jz $L$done
+ movdqu xmm7,XMMWORD PTR[32+rdx]
+ sub r9,010h
+ jz $L$odd_tail
+$L$skip4x::
+
+
+
+
+
+ movdqu xmm8,XMMWORD PTR[r8]
+ movdqu xmm3,XMMWORD PTR[16+r8]
+DB 102,69,15,56,0,194
+DB 102,65,15,56,0,218
+ pxor xmm0,xmm8
+
+ movdqa xmm5,xmm3
+ pshufd xmm4,xmm3,78
+ pxor xmm4,xmm3
+DB 102,15,58,68,218,0
+DB 102,15,58,68,234,17
+DB 102,15,58,68,231,0
+
+ lea r8,QWORD PTR[32+r8]
+ nop
+ sub r9,020h
+ jbe $L$even_tail
+ nop
+ jmp $L$mod_loop
+
+ALIGN 32
+$L$mod_loop::
+ movdqa xmm1,xmm0
+ movdqa xmm8,xmm4
+ pshufd xmm4,xmm0,78
+ pxor xmm4,xmm0
+
+DB 102,15,58,68,198,0
+DB 102,15,58,68,206,17
+DB 102,15,58,68,231,16
+
+ pxor xmm0,xmm3
+ pxor xmm1,xmm5
+ movdqu xmm9,XMMWORD PTR[r8]
+ pxor xmm8,xmm0
+DB 102,69,15,56,0,202
+ movdqu xmm3,XMMWORD PTR[16+r8]
+
+ pxor xmm8,xmm1
+ pxor xmm1,xmm9
+ pxor xmm4,xmm8
+DB 102,65,15,56,0,218
+ movdqa xmm8,xmm4
+ psrldq xmm8,8
+ pslldq xmm4,8
+ pxor xmm1,xmm8
+ pxor xmm0,xmm4
+
+ movdqa xmm5,xmm3
+
+ movdqa xmm9,xmm0
+ movdqa xmm8,xmm0
+ psllq xmm0,5
+ pxor xmm8,xmm0
+DB 102,15,58,68,218,0
+ psllq xmm0,1
+ pxor xmm0,xmm8
+ psllq xmm0,57
+ movdqa xmm8,xmm0
+ pslldq xmm0,8
+ psrldq xmm8,8
+ pxor xmm0,xmm9
+ pshufd xmm4,xmm5,78
+ pxor xmm1,xmm8
+ pxor xmm4,xmm5
+
+ movdqa xmm9,xmm0
+ psrlq xmm0,1
+DB 102,15,58,68,234,17
+ pxor xmm1,xmm9
+ pxor xmm9,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm9
+ lea r8,QWORD PTR[32+r8]
+ psrlq xmm0,1
+DB 102,15,58,68,231,0
+ pxor xmm0,xmm1
+
+ sub r9,020h
+ ja $L$mod_loop
+
+$L$even_tail::
+ movdqa xmm1,xmm0
+ movdqa xmm8,xmm4
+ pshufd xmm4,xmm0,78
+ pxor xmm4,xmm0
+
+DB 102,15,58,68,198,0
+DB 102,15,58,68,206,17
+DB 102,15,58,68,231,16
+
+ pxor xmm0,xmm3
+ pxor xmm1,xmm5
+ pxor xmm8,xmm0
+ pxor xmm8,xmm1
+ pxor xmm4,xmm8
+ movdqa xmm8,xmm4
+ psrldq xmm8,8
+ pslldq xmm4,8
+ pxor xmm1,xmm8
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ test r9,r9
+ jnz $L$done
+
+$L$odd_tail::
+ movdqu xmm8,XMMWORD PTR[r8]
+DB 102,69,15,56,0,194
+ pxor xmm0,xmm8
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,223,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+$L$done::
+DB 102,65,15,56,0,194
+ movdqu XMMWORD PTR[rcx],xmm0
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ movaps xmm10,XMMWORD PTR[64+rsp]
+ movaps xmm11,XMMWORD PTR[80+rsp]
+ movaps xmm12,XMMWORD PTR[96+rsp]
+ movaps xmm13,XMMWORD PTR[112+rsp]
+ movaps xmm14,XMMWORD PTR[128+rsp]
+ movaps xmm15,XMMWORD PTR[144+rsp]
+ lea rsp,QWORD PTR[168+rsp]
+$L$SEH_end_gcm_ghash_clmul::
+ DB 0F3h,0C3h ;repret
+gcm_ghash_clmul ENDP
+PUBLIC gcm_init_avx
+
+ALIGN 32
+gcm_init_avx PROC PUBLIC
+ jmp $L$_init_clmul
+gcm_init_avx ENDP
+PUBLIC gcm_gmult_avx
+
+ALIGN 32
+gcm_gmult_avx PROC PUBLIC
+ jmp $L$_gmult_clmul
+gcm_gmult_avx ENDP
+PUBLIC gcm_ghash_avx
+
+ALIGN 32
+gcm_ghash_avx PROC PUBLIC
+ jmp $L$_ghash_clmul
+gcm_ghash_avx ENDP
+ALIGN 64
+$L$bswap_mask::
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$0x1c2_polynomial::
+DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h
+$L$7_mask::
+ DD 7,0,7,0
+$L$7_mask_poly::
+ DD 7,0,450,0
+ALIGN 64
+
+$L$rem_4bit::
+ DD 0,0,0,471859200,0,943718400,0,610271232
+ DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+ DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+ DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+$L$rem_8bit::
+ DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh
+ DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh
+ DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh
+ DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh
+ DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh
+ DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh
+ DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh
+ DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh
+ DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh
+ DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh
+ DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh
+ DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh
+ DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh
+ DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh
+ DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh
+ DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh
+ DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh
+ DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh
+ DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh
+ DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh
+ DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh
+ DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh
+ DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh
+ DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh
+ DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh
+ DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh
+ DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh
+ DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh
+ DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh
+ DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh
+ DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh
+ DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh
+
+DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rax,QWORD PTR[24+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_gcm_gmult_4bit
+ DD imagerel $L$SEH_end_gcm_gmult_4bit
+ DD imagerel $L$SEH_info_gcm_gmult_4bit
+
+ DD imagerel $L$SEH_begin_gcm_ghash_4bit
+ DD imagerel $L$SEH_end_gcm_ghash_4bit
+ DD imagerel $L$SEH_info_gcm_ghash_4bit
+
+ DD imagerel $L$SEH_begin_gcm_init_clmul
+ DD imagerel $L$SEH_end_gcm_init_clmul
+ DD imagerel $L$SEH_info_gcm_init_clmul
+
+ DD imagerel $L$SEH_begin_gcm_ghash_clmul
+ DD imagerel $L$SEH_end_gcm_ghash_clmul
+ DD imagerel $L$SEH_info_gcm_ghash_clmul
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_gcm_gmult_4bit::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue
+$L$SEH_info_gcm_ghash_4bit::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue
+$L$SEH_info_gcm_init_clmul::
+DB 001h,008h,003h,000h
+DB 008h,068h,000h,000h
+DB 004h,022h,000h,000h
+$L$SEH_info_gcm_ghash_clmul::
+DB 001h,033h,016h,000h
+DB 033h,0f8h,009h,000h
+DB 02eh,0e8h,008h,000h
+DB 029h,0d8h,007h,000h
+DB 024h,0c8h,006h,000h
+DB 01fh,0b8h,005h,000h
+DB 01ah,0a8h,004h,000h
+DB 015h,098h,003h,000h
+DB 010h,088h,002h,000h
+DB 00ch,078h,001h,000h
+DB 008h,068h,000h,000h
+DB 004h,001h,015h,000h
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm b/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm
new file mode 100644
index 0000000..9d823ae
--- /dev/null
+++ b/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm
@@ -0,0 +1,1374 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+ALIGN 16
+
+PUBLIC rc4_md5_enc
+
+rc4_md5_enc PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_rc4_md5_enc::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ cmp r9,0
+ je $L$abort
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,40
+$L$body::
+ mov r11,rcx
+ mov r12,r9
+ mov r13,rsi
+ mov r14,rdx
+ mov r15,r8
+ xor rbp,rbp
+ xor rcx,rcx
+
+ lea rdi,QWORD PTR[8+rdi]
+ mov bpl,BYTE PTR[((-8))+rdi]
+ mov cl,BYTE PTR[((-4))+rdi]
+
+ inc bpl
+ sub r14,r13
+ mov eax,DWORD PTR[rbp*4+rdi]
+ add cl,al
+ lea rsi,QWORD PTR[rbp*4+rdi]
+ shl r12,6
+ add r12,r15
+ mov QWORD PTR[16+rsp],r12
+
+ mov QWORD PTR[24+rsp],r11
+ mov r8d,DWORD PTR[r11]
+ mov r9d,DWORD PTR[4+r11]
+ mov r10d,DWORD PTR[8+r11]
+ mov r11d,DWORD PTR[12+r11]
+ jmp $L$oop
+
+ALIGN 16
+$L$oop::
+ mov DWORD PTR[rsp],r8d
+ mov DWORD PTR[4+rsp],r9d
+ mov DWORD PTR[8+rsp],r10d
+ mov r12d,r11d
+ mov DWORD PTR[12+rsp],r11d
+ pxor xmm0,xmm0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r8d,DWORD PTR[r15]
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ add r8d,3614090360
+ xor r12d,r11d
+ movzx eax,al
+ mov DWORD PTR[rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,7
+ mov r12d,r10d
+ movd xmm0,DWORD PTR[rax*4+rdi]
+
+ add r8d,r9d
+ pxor xmm1,xmm1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r11d,DWORD PTR[4+r15]
+ add bl,dl
+ mov eax,DWORD PTR[8+rsi]
+ add r11d,3905402710
+ xor r12d,r10d
+ movzx ebx,bl
+ mov DWORD PTR[4+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,12
+ mov r12d,r9d
+ movd xmm1,DWORD PTR[rbx*4+rdi]
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r10d,DWORD PTR[8+r15]
+ add al,dl
+ mov ebx,DWORD PTR[12+rsi]
+ add r10d,606105819
+ xor r12d,r9d
+ movzx eax,al
+ mov DWORD PTR[8+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,17
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r9d,DWORD PTR[12+r15]
+ add bl,dl
+ mov eax,DWORD PTR[16+rsi]
+ add r9d,3250441966
+ xor r12d,r8d
+ movzx ebx,bl
+ mov DWORD PTR[12+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,22
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r8d,DWORD PTR[16+r15]
+ add al,dl
+ mov ebx,DWORD PTR[20+rsi]
+ add r8d,4118548399
+ xor r12d,r11d
+ movzx eax,al
+ mov DWORD PTR[16+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,7
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r11d,DWORD PTR[20+r15]
+ add bl,dl
+ mov eax,DWORD PTR[24+rsi]
+ add r11d,1200080426
+ xor r12d,r10d
+ movzx ebx,bl
+ mov DWORD PTR[20+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,12
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r10d,DWORD PTR[24+r15]
+ add al,dl
+ mov ebx,DWORD PTR[28+rsi]
+ add r10d,2821735955
+ xor r12d,r9d
+ movzx eax,al
+ mov DWORD PTR[24+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,17
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r9d,DWORD PTR[28+r15]
+ add bl,dl
+ mov eax,DWORD PTR[32+rsi]
+ add r9d,4249261313
+ xor r12d,r8d
+ movzx ebx,bl
+ mov DWORD PTR[28+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,22
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r8d,DWORD PTR[32+r15]
+ add al,dl
+ mov ebx,DWORD PTR[36+rsi]
+ add r8d,1770035416
+ xor r12d,r11d
+ movzx eax,al
+ mov DWORD PTR[32+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,7
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r11d,DWORD PTR[36+r15]
+ add bl,dl
+ mov eax,DWORD PTR[40+rsi]
+ add r11d,2336552879
+ xor r12d,r10d
+ movzx ebx,bl
+ mov DWORD PTR[36+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,12
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r10d,DWORD PTR[40+r15]
+ add al,dl
+ mov ebx,DWORD PTR[44+rsi]
+ add r10d,4294925233
+ xor r12d,r9d
+ movzx eax,al
+ mov DWORD PTR[40+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,17
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r9d,DWORD PTR[44+r15]
+ add bl,dl
+ mov eax,DWORD PTR[48+rsi]
+ add r9d,2304563134
+ xor r12d,r8d
+ movzx ebx,bl
+ mov DWORD PTR[44+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,22
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r8d,DWORD PTR[48+r15]
+ add al,dl
+ mov ebx,DWORD PTR[52+rsi]
+ add r8d,1804603682
+ xor r12d,r11d
+ movzx eax,al
+ mov DWORD PTR[48+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,7
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r11d,DWORD PTR[52+r15]
+ add bl,dl
+ mov eax,DWORD PTR[56+rsi]
+ add r11d,4254626195
+ xor r12d,r10d
+ movzx ebx,bl
+ mov DWORD PTR[52+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,12
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r10d,DWORD PTR[56+r15]
+ add al,dl
+ mov ebx,DWORD PTR[60+rsi]
+ add r10d,2792965006
+ xor r12d,r9d
+ movzx eax,al
+ mov DWORD PTR[56+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,17
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+
+ add r10d,r11d
+ movdqu xmm2,XMMWORD PTR[r13]
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r9d,DWORD PTR[60+r15]
+ add bl,dl
+ mov eax,DWORD PTR[64+rsi]
+ add r9d,1236535329
+ xor r12d,r8d
+ movzx ebx,bl
+ mov DWORD PTR[60+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,22
+ mov r12d,r10d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+
+ add r9d,r10d
+ psllq xmm1,8
+ pxor xmm2,xmm0
+ pxor xmm2,xmm1
+ pxor xmm0,xmm0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r8d,DWORD PTR[4+r15]
+ add al,dl
+ mov ebx,DWORD PTR[68+rsi]
+ add r8d,4129170786
+ xor r12d,r10d
+ movzx eax,al
+ mov DWORD PTR[64+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,5
+ mov r12d,r9d
+ movd xmm0,DWORD PTR[rax*4+rdi]
+
+ add r8d,r9d
+ pxor xmm1,xmm1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r11d,DWORD PTR[24+r15]
+ add bl,dl
+ mov eax,DWORD PTR[72+rsi]
+ add r11d,3225465664
+ xor r12d,r9d
+ movzx ebx,bl
+ mov DWORD PTR[68+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,9
+ mov r12d,r8d
+ movd xmm1,DWORD PTR[rbx*4+rdi]
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r10d,DWORD PTR[44+r15]
+ add al,dl
+ mov ebx,DWORD PTR[76+rsi]
+ add r10d,643717713
+ xor r12d,r8d
+ movzx eax,al
+ mov DWORD PTR[72+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,14
+ mov r12d,r11d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r9d,DWORD PTR[r15]
+ add bl,dl
+ mov eax,DWORD PTR[80+rsi]
+ add r9d,3921069994
+ xor r12d,r11d
+ movzx ebx,bl
+ mov DWORD PTR[76+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,20
+ mov r12d,r10d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r8d,DWORD PTR[20+r15]
+ add al,dl
+ mov ebx,DWORD PTR[84+rsi]
+ add r8d,3593408605
+ xor r12d,r10d
+ movzx eax,al
+ mov DWORD PTR[80+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,5
+ mov r12d,r9d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r11d,DWORD PTR[40+r15]
+ add bl,dl
+ mov eax,DWORD PTR[88+rsi]
+ add r11d,38016083
+ xor r12d,r9d
+ movzx ebx,bl
+ mov DWORD PTR[84+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,9
+ mov r12d,r8d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r10d,DWORD PTR[60+r15]
+ add al,dl
+ mov ebx,DWORD PTR[92+rsi]
+ add r10d,3634488961
+ xor r12d,r8d
+ movzx eax,al
+ mov DWORD PTR[88+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,14
+ mov r12d,r11d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r9d,DWORD PTR[16+r15]
+ add bl,dl
+ mov eax,DWORD PTR[96+rsi]
+ add r9d,3889429448
+ xor r12d,r11d
+ movzx ebx,bl
+ mov DWORD PTR[92+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,20
+ mov r12d,r10d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r8d,DWORD PTR[36+r15]
+ add al,dl
+ mov ebx,DWORD PTR[100+rsi]
+ add r8d,568446438
+ xor r12d,r10d
+ movzx eax,al
+ mov DWORD PTR[96+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,5
+ mov r12d,r9d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r11d,DWORD PTR[56+r15]
+ add bl,dl
+ mov eax,DWORD PTR[104+rsi]
+ add r11d,3275163606
+ xor r12d,r9d
+ movzx ebx,bl
+ mov DWORD PTR[100+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,9
+ mov r12d,r8d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r10d,DWORD PTR[12+r15]
+ add al,dl
+ mov ebx,DWORD PTR[108+rsi]
+ add r10d,4107603335
+ xor r12d,r8d
+ movzx eax,al
+ mov DWORD PTR[104+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,14
+ mov r12d,r11d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r9d,DWORD PTR[32+r15]
+ add bl,dl
+ mov eax,DWORD PTR[112+rsi]
+ add r9d,1163531501
+ xor r12d,r11d
+ movzx ebx,bl
+ mov DWORD PTR[108+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,20
+ mov r12d,r10d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r11d
+ add r8d,DWORD PTR[52+r15]
+ add al,dl
+ mov ebx,DWORD PTR[116+rsi]
+ add r8d,2850285829
+ xor r12d,r10d
+ movzx eax,al
+ mov DWORD PTR[112+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,5
+ mov r12d,r9d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r10d
+ add r11d,DWORD PTR[8+r15]
+ add bl,dl
+ mov eax,DWORD PTR[120+rsi]
+ add r11d,4243563512
+ xor r12d,r9d
+ movzx ebx,bl
+ mov DWORD PTR[116+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,9
+ mov r12d,r8d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ and r12d,r9d
+ add r10d,DWORD PTR[28+r15]
+ add al,dl
+ mov ebx,DWORD PTR[124+rsi]
+ add r10d,1735328473
+ xor r12d,r8d
+ movzx eax,al
+ mov DWORD PTR[120+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,14
+ mov r12d,r11d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+
+ add r10d,r11d
+ movdqu xmm3,XMMWORD PTR[16+r13]
+ add bpl,32
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ and r12d,r8d
+ add r9d,DWORD PTR[48+r15]
+ add bl,dl
+ mov eax,DWORD PTR[rbp*4+rdi]
+ add r9d,2368359562
+ xor r12d,r11d
+ movzx ebx,bl
+ mov DWORD PTR[124+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,20
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+
+ add r9d,r10d
+ mov rsi,rcx
+ xor rcx,rcx
+ mov cl,sil
+ lea rsi,QWORD PTR[rbp*4+rdi]
+ psllq xmm1,8
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ pxor xmm0,xmm0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r9d
+ add r8d,DWORD PTR[20+r15]
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ add r8d,4294588738
+ movzx eax,al
+ add r8d,r12d
+ mov DWORD PTR[rsi],edx
+ add cl,bl
+ rol r8d,4
+ mov r12d,r10d
+ movd xmm0,DWORD PTR[rax*4+rdi]
+
+ add r8d,r9d
+ pxor xmm1,xmm1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r8d
+ add r11d,DWORD PTR[32+r15]
+ add bl,dl
+ mov eax,DWORD PTR[8+rsi]
+ add r11d,2272392833
+ movzx ebx,bl
+ add r11d,r12d
+ mov DWORD PTR[4+rsi],edx
+ add cl,al
+ rol r11d,11
+ mov r12d,r9d
+ movd xmm1,DWORD PTR[rbx*4+rdi]
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r11d
+ add r10d,DWORD PTR[44+r15]
+ add al,dl
+ mov ebx,DWORD PTR[12+rsi]
+ add r10d,1839030562
+ movzx eax,al
+ add r10d,r12d
+ mov DWORD PTR[8+rsi],edx
+ add cl,bl
+ rol r10d,16
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r10d
+ add r9d,DWORD PTR[56+r15]
+ add bl,dl
+ mov eax,DWORD PTR[16+rsi]
+ add r9d,4259657740
+ movzx ebx,bl
+ add r9d,r12d
+ mov DWORD PTR[12+rsi],edx
+ add cl,al
+ rol r9d,23
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r9d
+ add r8d,DWORD PTR[4+r15]
+ add al,dl
+ mov ebx,DWORD PTR[20+rsi]
+ add r8d,2763975236
+ movzx eax,al
+ add r8d,r12d
+ mov DWORD PTR[16+rsi],edx
+ add cl,bl
+ rol r8d,4
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r8d
+ add r11d,DWORD PTR[16+r15]
+ add bl,dl
+ mov eax,DWORD PTR[24+rsi]
+ add r11d,1272893353
+ movzx ebx,bl
+ add r11d,r12d
+ mov DWORD PTR[20+rsi],edx
+ add cl,al
+ rol r11d,11
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r11d
+ add r10d,DWORD PTR[28+r15]
+ add al,dl
+ mov ebx,DWORD PTR[28+rsi]
+ add r10d,4139469664
+ movzx eax,al
+ add r10d,r12d
+ mov DWORD PTR[24+rsi],edx
+ add cl,bl
+ rol r10d,16
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r10d
+ add r9d,DWORD PTR[40+r15]
+ add bl,dl
+ mov eax,DWORD PTR[32+rsi]
+ add r9d,3200236656
+ movzx ebx,bl
+ add r9d,r12d
+ mov DWORD PTR[28+rsi],edx
+ add cl,al
+ rol r9d,23
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r9d
+ add r8d,DWORD PTR[52+r15]
+ add al,dl
+ mov ebx,DWORD PTR[36+rsi]
+ add r8d,681279174
+ movzx eax,al
+ add r8d,r12d
+ mov DWORD PTR[32+rsi],edx
+ add cl,bl
+ rol r8d,4
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r8d
+ add r11d,DWORD PTR[r15]
+ add bl,dl
+ mov eax,DWORD PTR[40+rsi]
+ add r11d,3936430074
+ movzx ebx,bl
+ add r11d,r12d
+ mov DWORD PTR[36+rsi],edx
+ add cl,al
+ rol r11d,11
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r11d
+ add r10d,DWORD PTR[12+r15]
+ add al,dl
+ mov ebx,DWORD PTR[44+rsi]
+ add r10d,3572445317
+ movzx eax,al
+ add r10d,r12d
+ mov DWORD PTR[40+rsi],edx
+ add cl,bl
+ rol r10d,16
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r10d
+ add r9d,DWORD PTR[24+r15]
+ add bl,dl
+ mov eax,DWORD PTR[48+rsi]
+ add r9d,76029189
+ movzx ebx,bl
+ add r9d,r12d
+ mov DWORD PTR[44+rsi],edx
+ add cl,al
+ rol r9d,23
+ mov r12d,r11d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r9d
+ add r8d,DWORD PTR[36+r15]
+ add al,dl
+ mov ebx,DWORD PTR[52+rsi]
+ add r8d,3654602809
+ movzx eax,al
+ add r8d,r12d
+ mov DWORD PTR[48+rsi],edx
+ add cl,bl
+ rol r8d,4
+ mov r12d,r10d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r8d
+ add r11d,DWORD PTR[48+r15]
+ add bl,dl
+ mov eax,DWORD PTR[56+rsi]
+ add r11d,3873151461
+ movzx ebx,bl
+ add r11d,r12d
+ mov DWORD PTR[52+rsi],edx
+ add cl,al
+ rol r11d,11
+ mov r12d,r9d
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],eax
+ xor r12d,r11d
+ add r10d,DWORD PTR[60+r15]
+ add al,dl
+ mov ebx,DWORD PTR[60+rsi]
+ add r10d,530742520
+ movzx eax,al
+ add r10d,r12d
+ mov DWORD PTR[56+rsi],edx
+ add cl,bl
+ rol r10d,16
+ mov r12d,r8d
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+
+ add r10d,r11d
+ movdqu xmm4,XMMWORD PTR[32+r13]
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ xor r12d,r10d
+ add r9d,DWORD PTR[8+r15]
+ add bl,dl
+ mov eax,DWORD PTR[64+rsi]
+ add r9d,3299628645
+ movzx ebx,bl
+ add r9d,r12d
+ mov DWORD PTR[60+rsi],edx
+ add cl,al
+ rol r9d,23
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+
+ add r9d,r10d
+ psllq xmm1,8
+ pxor xmm4,xmm0
+ pxor xmm4,xmm1
+ pxor xmm0,xmm0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r9d
+ add r8d,DWORD PTR[r15]
+ add al,dl
+ mov ebx,DWORD PTR[68+rsi]
+ add r8d,4096336452
+ movzx eax,al
+ xor r12d,r10d
+ mov DWORD PTR[64+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,6
+ mov r12d,-1
+ movd xmm0,DWORD PTR[rax*4+rdi]
+
+ add r8d,r9d
+ pxor xmm1,xmm1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r8d
+ add r11d,DWORD PTR[28+r15]
+ add bl,dl
+ mov eax,DWORD PTR[72+rsi]
+ add r11d,1126891415
+ movzx ebx,bl
+ xor r12d,r9d
+ mov DWORD PTR[68+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,10
+ mov r12d,-1
+ movd xmm1,DWORD PTR[rbx*4+rdi]
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r11d
+ add r10d,DWORD PTR[56+r15]
+ add al,dl
+ mov ebx,DWORD PTR[76+rsi]
+ add r10d,2878612391
+ movzx eax,al
+ xor r12d,r8d
+ mov DWORD PTR[72+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,15
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r10d
+ add r9d,DWORD PTR[20+r15]
+ add bl,dl
+ mov eax,DWORD PTR[80+rsi]
+ add r9d,4237533241
+ movzx ebx,bl
+ xor r12d,r11d
+ mov DWORD PTR[76+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,21
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r9d
+ add r8d,DWORD PTR[48+r15]
+ add al,dl
+ mov ebx,DWORD PTR[84+rsi]
+ add r8d,1700485571
+ movzx eax,al
+ xor r12d,r10d
+ mov DWORD PTR[80+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,6
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r8d
+ add r11d,DWORD PTR[12+r15]
+ add bl,dl
+ mov eax,DWORD PTR[88+rsi]
+ add r11d,2399980690
+ movzx ebx,bl
+ xor r12d,r9d
+ mov DWORD PTR[84+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,10
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r11d
+ add r10d,DWORD PTR[40+r15]
+ add al,dl
+ mov ebx,DWORD PTR[92+rsi]
+ add r10d,4293915773
+ movzx eax,al
+ xor r12d,r8d
+ mov DWORD PTR[88+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,15
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r10d
+ add r9d,DWORD PTR[4+r15]
+ add bl,dl
+ mov eax,DWORD PTR[96+rsi]
+ add r9d,2240044497
+ movzx ebx,bl
+ xor r12d,r11d
+ mov DWORD PTR[92+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,21
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r9d
+ add r8d,DWORD PTR[32+r15]
+ add al,dl
+ mov ebx,DWORD PTR[100+rsi]
+ add r8d,1873313359
+ movzx eax,al
+ xor r12d,r10d
+ mov DWORD PTR[96+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,6
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r8d
+ add r11d,DWORD PTR[60+r15]
+ add bl,dl
+ mov eax,DWORD PTR[104+rsi]
+ add r11d,4264355552
+ movzx ebx,bl
+ xor r12d,r9d
+ mov DWORD PTR[100+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,10
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r11d
+ add r10d,DWORD PTR[24+r15]
+ add al,dl
+ mov ebx,DWORD PTR[108+rsi]
+ add r10d,2734768916
+ movzx eax,al
+ xor r12d,r8d
+ mov DWORD PTR[104+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,15
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+
+ add r10d,r11d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r10d
+ add r9d,DWORD PTR[52+r15]
+ add bl,dl
+ mov eax,DWORD PTR[112+rsi]
+ add r9d,1309151649
+ movzx ebx,bl
+ xor r12d,r11d
+ mov DWORD PTR[108+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,21
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+
+ add r9d,r10d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r11d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r9d
+ add r8d,DWORD PTR[16+r15]
+ add al,dl
+ mov ebx,DWORD PTR[116+rsi]
+ add r8d,4149444226
+ movzx eax,al
+ xor r12d,r10d
+ mov DWORD PTR[112+rsi],edx
+ add r8d,r12d
+ add cl,bl
+ rol r8d,6
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+
+ add r8d,r9d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r10d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r8d
+ add r11d,DWORD PTR[44+r15]
+ add bl,dl
+ mov eax,DWORD PTR[120+rsi]
+ add r11d,3174756917
+ movzx ebx,bl
+ xor r12d,r9d
+ mov DWORD PTR[116+rsi],edx
+ add r11d,r12d
+ add cl,al
+ rol r11d,10
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+
+ add r11d,r8d
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r9d
+ mov DWORD PTR[rcx*4+rdi],eax
+ or r12d,r11d
+ add r10d,DWORD PTR[8+r15]
+ add al,dl
+ mov ebx,DWORD PTR[124+rsi]
+ add r10d,718787259
+ movzx eax,al
+ xor r12d,r8d
+ mov DWORD PTR[120+rsi],edx
+ add r10d,r12d
+ add cl,bl
+ rol r10d,15
+ mov r12d,-1
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+
+ add r10d,r11d
+ movdqu xmm5,XMMWORD PTR[48+r13]
+ add bpl,32
+ mov edx,DWORD PTR[rcx*4+rdi]
+ xor r12d,r8d
+ mov DWORD PTR[rcx*4+rdi],ebx
+ or r12d,r10d
+ add r9d,DWORD PTR[36+r15]
+ add bl,dl
+ mov eax,DWORD PTR[rbp*4+rdi]
+ add r9d,3951481745
+ movzx ebx,bl
+ xor r12d,r11d
+ mov DWORD PTR[124+rsi],edx
+ add r9d,r12d
+ add cl,al
+ rol r9d,21
+ mov r12d,-1
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+
+ add r9d,r10d
+ mov rsi,rbp
+ xor rbp,rbp
+ mov bpl,sil
+ mov rsi,rcx
+ xor rcx,rcx
+ mov cl,sil
+ lea rsi,QWORD PTR[rbp*4+rdi]
+ psllq xmm1,8
+ pxor xmm5,xmm0
+ pxor xmm5,xmm1
+ add r8d,DWORD PTR[rsp]
+ add r9d,DWORD PTR[4+rsp]
+ add r10d,DWORD PTR[8+rsp]
+ add r11d,DWORD PTR[12+rsp]
+
+ movdqu XMMWORD PTR[r13*1+r14],xmm2
+ movdqu XMMWORD PTR[16+r13*1+r14],xmm3
+ movdqu XMMWORD PTR[32+r13*1+r14],xmm4
+ movdqu XMMWORD PTR[48+r13*1+r14],xmm5
+ lea r15,QWORD PTR[64+r15]
+ lea r13,QWORD PTR[64+r13]
+ cmp r15,QWORD PTR[16+rsp]
+ jb $L$oop
+
+ mov r12,QWORD PTR[24+rsp]
+ sub cl,al
+ mov DWORD PTR[r12],r8d
+ mov DWORD PTR[4+r12],r9d
+ mov DWORD PTR[8+r12],r10d
+ mov DWORD PTR[12+r12],r11d
+ sub bpl,1
+ mov DWORD PTR[((-8))+rdi],ebp
+ mov DWORD PTR[((-4))+rdi],ecx
+
+ mov r15,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r13,QWORD PTR[56+rsp]
+ mov r12,QWORD PTR[64+rsp]
+ mov rbp,QWORD PTR[72+rsp]
+ mov rbx,QWORD PTR[80+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$epilogue::
+$L$abort::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_rc4_md5_enc::
+rc4_md5_enc ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$body]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov r15,QWORD PTR[40+rax]
+ mov r14,QWORD PTR[48+rax]
+ mov r13,QWORD PTR[56+rax]
+ mov r12,QWORD PTR[64+rax]
+ mov rbp,QWORD PTR[72+rax]
+ mov rbx,QWORD PTR[80+rax]
+ lea rax,QWORD PTR[88+rax]
+
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_rc4_md5_enc
+ DD imagerel $L$SEH_end_rc4_md5_enc
+ DD imagerel $L$SEH_info_rc4_md5_enc
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_rc4_md5_enc::
+DB 9,0,0,0
+ DD imagerel se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/rc4/rc4-x86_64.asm b/win-x86_64/crypto/rc4/rc4-x86_64.asm
new file mode 100644
index 0000000..c183cac
--- /dev/null
+++ b/win-x86_64/crypto/rc4/rc4-x86_64.asm
@@ -0,0 +1,773 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC asm_RC4
+
+ALIGN 16
+asm_RC4 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_RC4::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ or rsi,rsi
+ jne $L$entry
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$entry::
+ push rbx
+ push r12
+ push r13
+$L$prologue::
+ mov r11,rsi
+ mov r12,rdx
+ mov r13,rcx
+ xor r10,r10
+ xor rcx,rcx
+
+ lea rdi,QWORD PTR[8+rdi]
+ mov r10b,BYTE PTR[((-8))+rdi]
+ mov cl,BYTE PTR[((-4))+rdi]
+ cmp DWORD PTR[256+rdi],-1
+ je $L$RC4_CHAR
+ mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
+ xor rbx,rbx
+ inc r10b
+ sub rbx,r10
+ sub r13,r12
+ mov eax,DWORD PTR[r10*4+rdi]
+ test r11,-16
+ jz $L$loop1
+ bt r8d,30
+ jc $L$intel
+ and rbx,7
+ lea rsi,QWORD PTR[1+r10]
+ jz $L$oop8
+ sub r11,rbx
+$L$oop8_warmup::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
+ inc r10b
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r13*1+r12],dl
+ lea r12,QWORD PTR[1+r12]
+ dec rbx
+ jnz $L$oop8_warmup
+
+ lea rsi,QWORD PTR[1+r10]
+ jmp $L$oop8
+ALIGN 16
+$L$oop8::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[4+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[4+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[8+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[8+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[12+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[12+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[16+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[16+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[20+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[20+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[24+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[24+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add sil,8
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[((-4))+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[28+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add r10b,8
+ ror r8,8
+ sub r11,8
+
+ xor r8,QWORD PTR[r12]
+ mov QWORD PTR[r13*1+r12],r8
+ lea r12,QWORD PTR[8+r12]
+
+ test r11,-8
+ jnz $L$oop8
+ cmp r11,0
+ jne $L$loop1
+ jmp $L$exit
+
+ALIGN 16
+$L$intel::
+ test r11,-32
+ jz $L$loop1
+ and rbx,15
+ jz $L$oop16_is_hot
+ sub r11,rbx
+$L$oop16_warmup::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
+ inc r10b
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r13*1+r12],dl
+ lea r12,QWORD PTR[1+r12]
+ dec rbx
+ jnz $L$oop16_warmup
+
+ mov rbx,rcx
+ xor rcx,rcx
+ mov cl,bl
+
+$L$oop16_is_hot::
+ lea rsi,QWORD PTR[r10*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm0,xmm0
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ movzx eax,al
+ mov DWORD PTR[rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],0
+ jmp $L$oop16_enter
+ALIGN 16
+$L$oop16::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm2,xmm0
+ psllq xmm1,8
+ pxor xmm0,xmm0
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ movzx eax,al
+ mov DWORD PTR[rsi],edx
+ pxor xmm2,xmm1
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],0
+ movdqu XMMWORD PTR[r13*1+r12],xmm2
+ lea r12,QWORD PTR[16+r12]
+$L$oop16_enter::
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm1,xmm1
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[8+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[4+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[12+rsi]
+ movzx eax,al
+ mov DWORD PTR[8+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[16+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[12+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[20+rsi]
+ movzx eax,al
+ mov DWORD PTR[16+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[24+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[20+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[28+rsi]
+ movzx eax,al
+ mov DWORD PTR[24+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[32+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[28+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[36+rsi]
+ movzx eax,al
+ mov DWORD PTR[32+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[40+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[36+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[44+rsi]
+ movzx eax,al
+ mov DWORD PTR[40+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[48+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[44+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[52+rsi]
+ movzx eax,al
+ mov DWORD PTR[48+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[56+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[52+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[60+rsi]
+ movzx eax,al
+ mov DWORD PTR[56+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+ add r10b,16
+ movdqu xmm2,XMMWORD PTR[r12]
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ movzx ebx,bl
+ mov DWORD PTR[60+rsi],edx
+ lea rsi,QWORD PTR[r10*4+rdi]
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+ mov eax,DWORD PTR[rsi]
+ mov rbx,rcx
+ xor rcx,rcx
+ sub r11,16
+ mov cl,bl
+ test r11,-16
+ jnz $L$oop16
+
+ psllq xmm1,8
+ pxor xmm2,xmm0
+ pxor xmm2,xmm1
+ movdqu XMMWORD PTR[r13*1+r12],xmm2
+ lea r12,QWORD PTR[16+r12]
+
+ cmp r11,0
+ jne $L$loop1
+ jmp $L$exit
+
+ALIGN 16
+$L$loop1::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
+ inc r10b
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r13*1+r12],dl
+ lea r12,QWORD PTR[1+r12]
+ dec r11
+ jnz $L$loop1
+ jmp $L$exit
+
+ALIGN 16
+$L$RC4_CHAR::
+ add r10b,1
+ movzx eax,BYTE PTR[r10*1+rdi]
+ test r11,-8
+ jz $L$cloop1
+ jmp $L$cloop8
+ALIGN 16
+$L$cloop8::
+ mov r8d,DWORD PTR[r12]
+ mov r9d,DWORD PTR[4+r12]
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
+ jne $L$cmov0
+ mov rbx,rax
+$L$cmov0::
+ add dl,al
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
+ jne $L$cmov1
+ mov rax,rbx
+$L$cmov1::
+ add dl,bl
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
+ jne $L$cmov2
+ mov rbx,rax
+$L$cmov2::
+ add dl,al
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
+ jne $L$cmov3
+ mov rax,rbx
+$L$cmov3::
+ add dl,bl
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
+ jne $L$cmov4
+ mov rbx,rax
+$L$cmov4::
+ add dl,al
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
+ jne $L$cmov5
+ mov rax,rbx
+$L$cmov5::
+ add dl,bl
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
+ jne $L$cmov6
+ mov rbx,rax
+$L$cmov6::
+ add dl,al
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
+ jne $L$cmov7
+ mov rax,rbx
+$L$cmov7::
+ add dl,bl
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ lea r11,QWORD PTR[((-8))+r11]
+ mov DWORD PTR[r13],r8d
+ lea r12,QWORD PTR[8+r12]
+ mov DWORD PTR[4+r13],r9d
+ lea r13,QWORD PTR[8+r13]
+
+ test r11,-8
+ jnz $L$cloop8
+ cmp r11,0
+ jne $L$cloop1
+ jmp $L$exit
+ALIGN 16
+$L$cloop1::
+ add cl,al
+ movzx ecx,cl
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ mov BYTE PTR[r10*1+rdi],dl
+ add dl,al
+ add r10b,1
+ movzx edx,dl
+ movzx r10d,r10b
+ movzx edx,BYTE PTR[rdx*1+rdi]
+ movzx eax,BYTE PTR[r10*1+rdi]
+ xor dl,BYTE PTR[r12]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[r13],dl
+ lea r13,QWORD PTR[1+r13]
+ sub r11,1
+ jnz $L$cloop1
+ jmp $L$exit
+
+ALIGN 16
+$L$exit::
+ sub r10b,1
+ mov DWORD PTR[((-8))+rdi],r10d
+ mov DWORD PTR[((-4))+rdi],ecx
+
+ mov r13,QWORD PTR[rsp]
+ mov r12,QWORD PTR[8+rsp]
+ mov rbx,QWORD PTR[16+rsp]
+ add rsp,24
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_RC4::
+asm_RC4 ENDP
+PUBLIC asm_RC4_set_key
+
+ALIGN 16
+asm_RC4_set_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_asm_RC4_set_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rdi,QWORD PTR[8+rdi]
+ lea rdx,QWORD PTR[rsi*1+rdx]
+ neg rsi
+ mov rcx,rsi
+ xor eax,eax
+ xor r9,r9
+ xor r10,r10
+ xor r11,r11
+
+ mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
+ bt r8d,20
+ jc $L$c1stloop
+ jmp $L$w1stloop
+
+ALIGN 16
+$L$w1stloop::
+ mov DWORD PTR[rax*4+rdi],eax
+ add al,1
+ jnc $L$w1stloop
+
+ xor r9,r9
+ xor r8,r8
+ALIGN 16
+$L$w2ndloop::
+ mov r10d,DWORD PTR[r9*4+rdi]
+ add r8b,BYTE PTR[rsi*1+rdx]
+ add r8b,r10b
+ add rsi,1
+ mov r11d,DWORD PTR[r8*4+rdi]
+ cmovz rsi,rcx
+ mov DWORD PTR[r8*4+rdi],r10d
+ mov DWORD PTR[r9*4+rdi],r11d
+ add r9b,1
+ jnc $L$w2ndloop
+ jmp $L$exit_key
+
+ALIGN 16
+$L$c1stloop::
+ mov BYTE PTR[rax*1+rdi],al
+ add al,1
+ jnc $L$c1stloop
+
+ xor r9,r9
+ xor r8,r8
+ALIGN 16
+$L$c2ndloop::
+ mov r10b,BYTE PTR[r9*1+rdi]
+ add r8b,BYTE PTR[rsi*1+rdx]
+ add r8b,r10b
+ add rsi,1
+ mov r11b,BYTE PTR[r8*1+rdi]
+ jnz $L$cnowrap
+ mov rsi,rcx
+$L$cnowrap::
+ mov BYTE PTR[r8*1+rdi],r10b
+ mov BYTE PTR[r9*1+rdi],r11b
+ add r9b,1
+ jnc $L$c2ndloop
+ mov DWORD PTR[256+rdi],-1
+
+ALIGN 16
+$L$exit_key::
+ xor eax,eax
+ mov DWORD PTR[((-8))+rdi],eax
+ mov DWORD PTR[((-4))+rdi],eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_asm_RC4_set_key::
+asm_RC4_set_key ENDP
+
+PUBLIC RC4_options
+
+ALIGN 16
+RC4_options PROC PUBLIC
+ lea rax,QWORD PTR[$L$opts]
+ mov rdx,QWORD PTR[OPENSSL_ia32cap_P]
+ mov edx,DWORD PTR[rdx]
+ bt edx,20
+ jc $L$8xchar
+ bt edx,30
+ jnc $L$done
+ add rax,25
+ DB 0F3h,0C3h ;repret
+$L$8xchar::
+ add rax,12
+$L$done::
+ DB 0F3h,0C3h ;repret
+ALIGN 64
+$L$opts::
+DB 114,99,52,40,56,120,44,105,110,116,41,0
+DB 114,99,52,40,56,120,44,99,104,97,114,41,0
+DB 114,99,52,40,49,54,120,44,105,110,116,41,0
+DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB 62,0
+ALIGN 64
+RC4_options ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+stream_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$prologue]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rax,QWORD PTR[24+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov r12,QWORD PTR[((-16))+rax]
+ mov r13,QWORD PTR[((-24))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+stream_se_handler ENDP
+
+
+ALIGN 16
+key_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+$L$common_seh_exit::
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+key_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_asm_RC4
+ DD imagerel $L$SEH_end_asm_RC4
+ DD imagerel $L$SEH_info_asm_RC4
+
+ DD imagerel $L$SEH_begin_asm_RC4_set_key
+ DD imagerel $L$SEH_end_asm_RC4_set_key
+ DD imagerel $L$SEH_info_asm_RC4_set_key
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_asm_RC4::
+DB 9,0,0,0
+ DD imagerel stream_se_handler
+$L$SEH_info_asm_RC4_set_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/sha/sha1-x86_64.asm b/win-x86_64/crypto/sha/sha1-x86_64.asm
new file mode 100644
index 0000000..ecda6dc
--- /dev/null
+++ b/win-x86_64/crypto/sha/sha1-x86_64.asm
@@ -0,0 +1,2619 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC sha1_block_data_order
+
+ALIGN 16
+sha1_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+ mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
+ test r8d,512
+ jz $L$ialu
+ jmp _ssse3_shortcut
+
+ALIGN 16
+$L$ialu::
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ mov r8,rdi
+ sub rsp,72
+ mov r9,rsi
+ and rsp,-64
+ mov r10,rdx
+ mov QWORD PTR[64+rsp],rax
+$L$prologue::
+
+ mov esi,DWORD PTR[r8]
+ mov edi,DWORD PTR[4+r8]
+ mov r11d,DWORD PTR[8+r8]
+ mov r12d,DWORD PTR[12+r8]
+ mov r13d,DWORD PTR[16+r8]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ mov edx,DWORD PTR[r9]
+ bswap edx
+ mov ebp,DWORD PTR[4+r9]
+ mov eax,r12d
+ mov DWORD PTR[rsp],edx
+ mov ecx,esi
+ bswap ebp
+ xor eax,r11d
+ rol ecx,5
+ and eax,edi
+ lea r13d,DWORD PTR[1518500249+r13*1+rdx]
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov r14d,DWORD PTR[8+r9]
+ mov eax,r11d
+ mov DWORD PTR[4+rsp],ebp
+ mov ecx,r13d
+ bswap r14d
+ xor eax,edi
+ rol ecx,5
+ and eax,esi
+ lea r12d,DWORD PTR[1518500249+r12*1+rbp]
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov edx,DWORD PTR[12+r9]
+ mov eax,edi
+ mov DWORD PTR[8+rsp],r14d
+ mov ecx,r12d
+ bswap edx
+ xor eax,esi
+ rol ecx,5
+ and eax,r13d
+ lea r11d,DWORD PTR[1518500249+r11*1+r14]
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov ebp,DWORD PTR[16+r9]
+ mov eax,esi
+ mov DWORD PTR[12+rsp],edx
+ mov ecx,r11d
+ bswap ebp
+ xor eax,r13d
+ rol ecx,5
+ and eax,r12d
+ lea edi,DWORD PTR[1518500249+rdi*1+rdx]
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov r14d,DWORD PTR[20+r9]
+ mov eax,r13d
+ mov DWORD PTR[16+rsp],ebp
+ mov ecx,edi
+ bswap r14d
+ xor eax,r12d
+ rol ecx,5
+ and eax,r11d
+ lea esi,DWORD PTR[1518500249+rsi*1+rbp]
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ mov edx,DWORD PTR[24+r9]
+ mov eax,r12d
+ mov DWORD PTR[20+rsp],r14d
+ mov ecx,esi
+ bswap edx
+ xor eax,r11d
+ rol ecx,5
+ and eax,edi
+ lea r13d,DWORD PTR[1518500249+r13*1+r14]
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov ebp,DWORD PTR[28+r9]
+ mov eax,r11d
+ mov DWORD PTR[24+rsp],edx
+ mov ecx,r13d
+ bswap ebp
+ xor eax,edi
+ rol ecx,5
+ and eax,esi
+ lea r12d,DWORD PTR[1518500249+r12*1+rdx]
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov r14d,DWORD PTR[32+r9]
+ mov eax,edi
+ mov DWORD PTR[28+rsp],ebp
+ mov ecx,r12d
+ bswap r14d
+ xor eax,esi
+ rol ecx,5
+ and eax,r13d
+ lea r11d,DWORD PTR[1518500249+r11*1+rbp]
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov edx,DWORD PTR[36+r9]
+ mov eax,esi
+ mov DWORD PTR[32+rsp],r14d
+ mov ecx,r11d
+ bswap edx
+ xor eax,r13d
+ rol ecx,5
+ and eax,r12d
+ lea edi,DWORD PTR[1518500249+rdi*1+r14]
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov ebp,DWORD PTR[40+r9]
+ mov eax,r13d
+ mov DWORD PTR[36+rsp],edx
+ mov ecx,edi
+ bswap ebp
+ xor eax,r12d
+ rol ecx,5
+ and eax,r11d
+ lea esi,DWORD PTR[1518500249+rsi*1+rdx]
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ mov r14d,DWORD PTR[44+r9]
+ mov eax,r12d
+ mov DWORD PTR[40+rsp],ebp
+ mov ecx,esi
+ bswap r14d
+ xor eax,r11d
+ rol ecx,5
+ and eax,edi
+ lea r13d,DWORD PTR[1518500249+r13*1+rbp]
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov edx,DWORD PTR[48+r9]
+ mov eax,r11d
+ mov DWORD PTR[44+rsp],r14d
+ mov ecx,r13d
+ bswap edx
+ xor eax,edi
+ rol ecx,5
+ and eax,esi
+ lea r12d,DWORD PTR[1518500249+r12*1+r14]
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov ebp,DWORD PTR[52+r9]
+ mov eax,edi
+ mov DWORD PTR[48+rsp],edx
+ mov ecx,r12d
+ bswap ebp
+ xor eax,esi
+ rol ecx,5
+ and eax,r13d
+ lea r11d,DWORD PTR[1518500249+r11*1+rdx]
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov r14d,DWORD PTR[56+r9]
+ mov eax,esi
+ mov DWORD PTR[52+rsp],ebp
+ mov ecx,r11d
+ bswap r14d
+ xor eax,r13d
+ rol ecx,5
+ and eax,r12d
+ lea edi,DWORD PTR[1518500249+rdi*1+rbp]
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov edx,DWORD PTR[60+r9]
+ mov eax,r13d
+ mov DWORD PTR[56+rsp],r14d
+ mov ecx,edi
+ bswap edx
+ xor eax,r12d
+ rol ecx,5
+ and eax,r11d
+ lea esi,DWORD PTR[1518500249+rsi*1+r14]
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ xor ebp,DWORD PTR[rsp]
+ mov eax,r12d
+ mov DWORD PTR[60+rsp],edx
+ mov ecx,esi
+ xor ebp,DWORD PTR[8+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor ebp,DWORD PTR[32+rsp]
+ and eax,edi
+ lea r13d,DWORD PTR[1518500249+r13*1+rdx]
+ rol edi,30
+ xor eax,r12d
+ add r13d,ecx
+ rol ebp,1
+ add r13d,eax
+ xor r14d,DWORD PTR[4+rsp]
+ mov eax,r11d
+ mov DWORD PTR[rsp],ebp
+ mov ecx,r13d
+ xor r14d,DWORD PTR[12+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor r14d,DWORD PTR[36+rsp]
+ and eax,esi
+ lea r12d,DWORD PTR[1518500249+r12*1+rbp]
+ rol esi,30
+ xor eax,r11d
+ add r12d,ecx
+ rol r14d,1
+ add r12d,eax
+ xor edx,DWORD PTR[8+rsp]
+ mov eax,edi
+ mov DWORD PTR[4+rsp],r14d
+ mov ecx,r12d
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor edx,DWORD PTR[40+rsp]
+ and eax,r13d
+ lea r11d,DWORD PTR[1518500249+r11*1+r14]
+ rol r13d,30
+ xor eax,edi
+ add r11d,ecx
+ rol edx,1
+ add r11d,eax
+ xor ebp,DWORD PTR[12+rsp]
+ mov eax,esi
+ mov DWORD PTR[8+rsp],edx
+ mov ecx,r11d
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor ebp,DWORD PTR[44+rsp]
+ and eax,r12d
+ lea edi,DWORD PTR[1518500249+rdi*1+rdx]
+ rol r12d,30
+ xor eax,esi
+ add edi,ecx
+ rol ebp,1
+ add edi,eax
+ xor r14d,DWORD PTR[16+rsp]
+ mov eax,r13d
+ mov DWORD PTR[12+rsp],ebp
+ mov ecx,edi
+ xor r14d,DWORD PTR[24+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor r14d,DWORD PTR[48+rsp]
+ and eax,r11d
+ lea esi,DWORD PTR[1518500249+rsi*1+rbp]
+ rol r11d,30
+ xor eax,r13d
+ add esi,ecx
+ rol r14d,1
+ add esi,eax
+ xor edx,DWORD PTR[20+rsp]
+ mov eax,edi
+ mov DWORD PTR[16+rsp],r14d
+ mov ecx,esi
+ xor edx,DWORD PTR[28+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor edx,DWORD PTR[52+rsp]
+ lea r13d,DWORD PTR[1859775393+r13*1+r14]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[24+rsp]
+ mov eax,esi
+ mov DWORD PTR[20+rsp],edx
+ mov ecx,r13d
+ xor ebp,DWORD PTR[32+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor ebp,DWORD PTR[56+rsp]
+ lea r12d,DWORD PTR[1859775393+r12*1+rdx]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[28+rsp]
+ mov eax,r13d
+ mov DWORD PTR[24+rsp],ebp
+ mov ecx,r12d
+ xor r14d,DWORD PTR[36+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor r14d,DWORD PTR[60+rsp]
+ lea r11d,DWORD PTR[1859775393+r11*1+rbp]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[32+rsp]
+ mov eax,r12d
+ mov DWORD PTR[28+rsp],r14d
+ mov ecx,r11d
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor edx,DWORD PTR[rsp]
+ lea edi,DWORD PTR[1859775393+rdi*1+r14]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ xor ebp,DWORD PTR[36+rsp]
+ mov eax,r11d
+ mov DWORD PTR[32+rsp],edx
+ mov ecx,edi
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor ebp,DWORD PTR[4+rsp]
+ lea esi,DWORD PTR[1859775393+rsi*1+rdx]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[40+rsp]
+ mov eax,edi
+ mov DWORD PTR[36+rsp],ebp
+ mov ecx,esi
+ xor r14d,DWORD PTR[48+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor r14d,DWORD PTR[8+rsp]
+ lea r13d,DWORD PTR[1859775393+r13*1+rbp]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[44+rsp]
+ mov eax,esi
+ mov DWORD PTR[40+rsp],r14d
+ mov ecx,r13d
+ xor edx,DWORD PTR[52+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor edx,DWORD PTR[12+rsp]
+ lea r12d,DWORD PTR[1859775393+r12*1+r14]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[48+rsp]
+ mov eax,r13d
+ mov DWORD PTR[44+rsp],edx
+ mov ecx,r12d
+ xor ebp,DWORD PTR[56+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor ebp,DWORD PTR[16+rsp]
+ lea r11d,DWORD PTR[1859775393+r11*1+rdx]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[52+rsp]
+ mov eax,r12d
+ mov DWORD PTR[48+rsp],ebp
+ mov ecx,r11d
+ xor r14d,DWORD PTR[60+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor r14d,DWORD PTR[20+rsp]
+ lea edi,DWORD PTR[1859775393+rdi*1+rbp]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol r14d,1
+ xor edx,DWORD PTR[56+rsp]
+ mov eax,r11d
+ mov DWORD PTR[52+rsp],r14d
+ mov ecx,edi
+ xor edx,DWORD PTR[rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor edx,DWORD PTR[24+rsp]
+ lea esi,DWORD PTR[1859775393+rsi*1+r14]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol edx,1
+ xor ebp,DWORD PTR[60+rsp]
+ mov eax,edi
+ mov DWORD PTR[56+rsp],edx
+ mov ecx,esi
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor ebp,DWORD PTR[28+rsp]
+ lea r13d,DWORD PTR[1859775393+r13*1+rdx]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[rsp]
+ mov eax,esi
+ mov DWORD PTR[60+rsp],ebp
+ mov ecx,r13d
+ xor r14d,DWORD PTR[8+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor r14d,DWORD PTR[32+rsp]
+ lea r12d,DWORD PTR[1859775393+r12*1+rbp]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[4+rsp]
+ mov eax,r13d
+ mov DWORD PTR[rsp],r14d
+ mov ecx,r12d
+ xor edx,DWORD PTR[12+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor edx,DWORD PTR[36+rsp]
+ lea r11d,DWORD PTR[1859775393+r11*1+r14]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[8+rsp]
+ mov eax,r12d
+ mov DWORD PTR[4+rsp],edx
+ mov ecx,r11d
+ xor ebp,DWORD PTR[16+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor ebp,DWORD PTR[40+rsp]
+ lea edi,DWORD PTR[1859775393+rdi*1+rdx]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[12+rsp]
+ mov eax,r11d
+ mov DWORD PTR[8+rsp],ebp
+ mov ecx,edi
+ xor r14d,DWORD PTR[20+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor r14d,DWORD PTR[44+rsp]
+ lea esi,DWORD PTR[1859775393+rsi*1+rbp]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol r14d,1
+ xor edx,DWORD PTR[16+rsp]
+ mov eax,edi
+ mov DWORD PTR[12+rsp],r14d
+ mov ecx,esi
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor edx,DWORD PTR[48+rsp]
+ lea r13d,DWORD PTR[1859775393+r13*1+r14]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[20+rsp]
+ mov eax,esi
+ mov DWORD PTR[16+rsp],edx
+ mov ecx,r13d
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor ebp,DWORD PTR[52+rsp]
+ lea r12d,DWORD PTR[1859775393+r12*1+rdx]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[24+rsp]
+ mov eax,r13d
+ mov DWORD PTR[20+rsp],ebp
+ mov ecx,r12d
+ xor r14d,DWORD PTR[32+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor r14d,DWORD PTR[56+rsp]
+ lea r11d,DWORD PTR[1859775393+r11*1+rbp]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[28+rsp]
+ mov eax,r12d
+ mov DWORD PTR[24+rsp],r14d
+ mov ecx,r11d
+ xor edx,DWORD PTR[36+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor edx,DWORD PTR[60+rsp]
+ lea edi,DWORD PTR[1859775393+rdi*1+r14]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ xor ebp,DWORD PTR[32+rsp]
+ mov eax,r11d
+ mov DWORD PTR[28+rsp],edx
+ mov ecx,edi
+ xor ebp,DWORD PTR[40+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor ebp,DWORD PTR[rsp]
+ lea esi,DWORD PTR[1859775393+rsi*1+rdx]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[36+rsp]
+ mov eax,r12d
+ mov DWORD PTR[32+rsp],ebp
+ mov ebx,r12d
+ xor r14d,DWORD PTR[44+rsp]
+ and eax,r11d
+ mov ecx,esi
+ xor r14d,DWORD PTR[4+rsp]
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+ xor ebx,r11d
+ rol ecx,5
+ add r13d,eax
+ rol r14d,1
+ and ebx,edi
+ add r13d,ecx
+ rol edi,30
+ add r13d,ebx
+ xor edx,DWORD PTR[40+rsp]
+ mov eax,r11d
+ mov DWORD PTR[36+rsp],r14d
+ mov ebx,r11d
+ xor edx,DWORD PTR[48+rsp]
+ and eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[8+rsp]
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+r14]
+ xor ebx,edi
+ rol ecx,5
+ add r12d,eax
+ rol edx,1
+ and ebx,esi
+ add r12d,ecx
+ rol esi,30
+ add r12d,ebx
+ xor ebp,DWORD PTR[44+rsp]
+ mov eax,edi
+ mov DWORD PTR[40+rsp],edx
+ mov ebx,edi
+ xor ebp,DWORD PTR[52+rsp]
+ and eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[12+rsp]
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+ xor ebx,esi
+ rol ecx,5
+ add r11d,eax
+ rol ebp,1
+ and ebx,r13d
+ add r11d,ecx
+ rol r13d,30
+ add r11d,ebx
+ xor r14d,DWORD PTR[48+rsp]
+ mov eax,esi
+ mov DWORD PTR[44+rsp],ebp
+ mov ebx,esi
+ xor r14d,DWORD PTR[56+rsp]
+ and eax,r13d
+ mov ecx,r11d
+ xor r14d,DWORD PTR[16+rsp]
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+ xor ebx,r13d
+ rol ecx,5
+ add edi,eax
+ rol r14d,1
+ and ebx,r12d
+ add edi,ecx
+ rol r12d,30
+ add edi,ebx
+ xor edx,DWORD PTR[52+rsp]
+ mov eax,r13d
+ mov DWORD PTR[48+rsp],r14d
+ mov ebx,r13d
+ xor edx,DWORD PTR[60+rsp]
+ and eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[20+rsp]
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+r14]
+ xor ebx,r12d
+ rol ecx,5
+ add esi,eax
+ rol edx,1
+ and ebx,r11d
+ add esi,ecx
+ rol r11d,30
+ add esi,ebx
+ xor ebp,DWORD PTR[56+rsp]
+ mov eax,r12d
+ mov DWORD PTR[52+rsp],edx
+ mov ebx,r12d
+ xor ebp,DWORD PTR[rsp]
+ and eax,r11d
+ mov ecx,esi
+ xor ebp,DWORD PTR[24+rsp]
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx]
+ xor ebx,r11d
+ rol ecx,5
+ add r13d,eax
+ rol ebp,1
+ and ebx,edi
+ add r13d,ecx
+ rol edi,30
+ add r13d,ebx
+ xor r14d,DWORD PTR[60+rsp]
+ mov eax,r11d
+ mov DWORD PTR[56+rsp],ebp
+ mov ebx,r11d
+ xor r14d,DWORD PTR[4+rsp]
+ and eax,edi
+ mov ecx,r13d
+ xor r14d,DWORD PTR[28+rsp]
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp]
+ xor ebx,edi
+ rol ecx,5
+ add r12d,eax
+ rol r14d,1
+ and ebx,esi
+ add r12d,ecx
+ rol esi,30
+ add r12d,ebx
+ xor edx,DWORD PTR[rsp]
+ mov eax,edi
+ mov DWORD PTR[60+rsp],r14d
+ mov ebx,edi
+ xor edx,DWORD PTR[8+rsp]
+ and eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[32+rsp]
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+r14]
+ xor ebx,esi
+ rol ecx,5
+ add r11d,eax
+ rol edx,1
+ and ebx,r13d
+ add r11d,ecx
+ rol r13d,30
+ add r11d,ebx
+ xor ebp,DWORD PTR[4+rsp]
+ mov eax,esi
+ mov DWORD PTR[rsp],edx
+ mov ebx,esi
+ xor ebp,DWORD PTR[12+rsp]
+ and eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[36+rsp]
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx]
+ xor ebx,r13d
+ rol ecx,5
+ add edi,eax
+ rol ebp,1
+ and ebx,r12d
+ add edi,ecx
+ rol r12d,30
+ add edi,ebx
+ xor r14d,DWORD PTR[8+rsp]
+ mov eax,r13d
+ mov DWORD PTR[4+rsp],ebp
+ mov ebx,r13d
+ xor r14d,DWORD PTR[16+rsp]
+ and eax,r12d
+ mov ecx,edi
+ xor r14d,DWORD PTR[40+rsp]
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp]
+ xor ebx,r12d
+ rol ecx,5
+ add esi,eax
+ rol r14d,1
+ and ebx,r11d
+ add esi,ecx
+ rol r11d,30
+ add esi,ebx
+ xor edx,DWORD PTR[12+rsp]
+ mov eax,r12d
+ mov DWORD PTR[8+rsp],r14d
+ mov ebx,r12d
+ xor edx,DWORD PTR[20+rsp]
+ and eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[44+rsp]
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+r14]
+ xor ebx,r11d
+ rol ecx,5
+ add r13d,eax
+ rol edx,1
+ and ebx,edi
+ add r13d,ecx
+ rol edi,30
+ add r13d,ebx
+ xor ebp,DWORD PTR[16+rsp]
+ mov eax,r11d
+ mov DWORD PTR[12+rsp],edx
+ mov ebx,r11d
+ xor ebp,DWORD PTR[24+rsp]
+ and eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[48+rsp]
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx]
+ xor ebx,edi
+ rol ecx,5
+ add r12d,eax
+ rol ebp,1
+ and ebx,esi
+ add r12d,ecx
+ rol esi,30
+ add r12d,ebx
+ xor r14d,DWORD PTR[20+rsp]
+ mov eax,edi
+ mov DWORD PTR[16+rsp],ebp
+ mov ebx,edi
+ xor r14d,DWORD PTR[28+rsp]
+ and eax,esi
+ mov ecx,r12d
+ xor r14d,DWORD PTR[52+rsp]
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp]
+ xor ebx,esi
+ rol ecx,5
+ add r11d,eax
+ rol r14d,1
+ and ebx,r13d
+ add r11d,ecx
+ rol r13d,30
+ add r11d,ebx
+ xor edx,DWORD PTR[24+rsp]
+ mov eax,esi
+ mov DWORD PTR[20+rsp],r14d
+ mov ebx,esi
+ xor edx,DWORD PTR[32+rsp]
+ and eax,r13d
+ mov ecx,r11d
+ xor edx,DWORD PTR[56+rsp]
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+r14]
+ xor ebx,r13d
+ rol ecx,5
+ add edi,eax
+ rol edx,1
+ and ebx,r12d
+ add edi,ecx
+ rol r12d,30
+ add edi,ebx
+ xor ebp,DWORD PTR[28+rsp]
+ mov eax,r13d
+ mov DWORD PTR[24+rsp],edx
+ mov ebx,r13d
+ xor ebp,DWORD PTR[36+rsp]
+ and eax,r12d
+ mov ecx,edi
+ xor ebp,DWORD PTR[60+rsp]
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx]
+ xor ebx,r12d
+ rol ecx,5
+ add esi,eax
+ rol ebp,1
+ and ebx,r11d
+ add esi,ecx
+ rol r11d,30
+ add esi,ebx
+ xor r14d,DWORD PTR[32+rsp]
+ mov eax,r12d
+ mov DWORD PTR[28+rsp],ebp
+ mov ebx,r12d
+ xor r14d,DWORD PTR[40+rsp]
+ and eax,r11d
+ mov ecx,esi
+ xor r14d,DWORD PTR[rsp]
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+ xor ebx,r11d
+ rol ecx,5
+ add r13d,eax
+ rol r14d,1
+ and ebx,edi
+ add r13d,ecx
+ rol edi,30
+ add r13d,ebx
+ xor edx,DWORD PTR[36+rsp]
+ mov eax,r11d
+ mov DWORD PTR[32+rsp],r14d
+ mov ebx,r11d
+ xor edx,DWORD PTR[44+rsp]
+ and eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[4+rsp]
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+r14]
+ xor ebx,edi
+ rol ecx,5
+ add r12d,eax
+ rol edx,1
+ and ebx,esi
+ add r12d,ecx
+ rol esi,30
+ add r12d,ebx
+ xor ebp,DWORD PTR[40+rsp]
+ mov eax,edi
+ mov DWORD PTR[36+rsp],edx
+ mov ebx,edi
+ xor ebp,DWORD PTR[48+rsp]
+ and eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[8+rsp]
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+ xor ebx,esi
+ rol ecx,5
+ add r11d,eax
+ rol ebp,1
+ and ebx,r13d
+ add r11d,ecx
+ rol r13d,30
+ add r11d,ebx
+ xor r14d,DWORD PTR[44+rsp]
+ mov eax,esi
+ mov DWORD PTR[40+rsp],ebp
+ mov ebx,esi
+ xor r14d,DWORD PTR[52+rsp]
+ and eax,r13d
+ mov ecx,r11d
+ xor r14d,DWORD PTR[12+rsp]
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+ xor ebx,r13d
+ rol ecx,5
+ add edi,eax
+ rol r14d,1
+ and ebx,r12d
+ add edi,ecx
+ rol r12d,30
+ add edi,ebx
+ xor edx,DWORD PTR[48+rsp]
+ mov eax,r13d
+ mov DWORD PTR[44+rsp],r14d
+ mov ebx,r13d
+ xor edx,DWORD PTR[56+rsp]
+ and eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[16+rsp]
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+r14]
+ xor ebx,r12d
+ rol ecx,5
+ add esi,eax
+ rol edx,1
+ and ebx,r11d
+ add esi,ecx
+ rol r11d,30
+ add esi,ebx
+ xor ebp,DWORD PTR[52+rsp]
+ mov eax,edi
+ mov DWORD PTR[48+rsp],edx
+ mov ecx,esi
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor ebp,DWORD PTR[20+rsp]
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[56+rsp]
+ mov eax,esi
+ mov DWORD PTR[52+rsp],ebp
+ mov ecx,r13d
+ xor r14d,DWORD PTR[rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor r14d,DWORD PTR[24+rsp]
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[60+rsp]
+ mov eax,r13d
+ mov DWORD PTR[56+rsp],r14d
+ mov ecx,r12d
+ xor edx,DWORD PTR[4+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor edx,DWORD PTR[28+rsp]
+ lea r11d,DWORD PTR[((-899497514))+r11*1+r14]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[rsp]
+ mov eax,r12d
+ mov DWORD PTR[60+rsp],edx
+ mov ecx,r11d
+ xor ebp,DWORD PTR[8+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor ebp,DWORD PTR[32+rsp]
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[4+rsp]
+ mov eax,r11d
+ mov DWORD PTR[rsp],ebp
+ mov ecx,edi
+ xor r14d,DWORD PTR[12+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor r14d,DWORD PTR[36+rsp]
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol r14d,1
+ xor edx,DWORD PTR[8+rsp]
+ mov eax,edi
+ mov DWORD PTR[4+rsp],r14d
+ mov ecx,esi
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor edx,DWORD PTR[40+rsp]
+ lea r13d,DWORD PTR[((-899497514))+r13*1+r14]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[12+rsp]
+ mov eax,esi
+ mov DWORD PTR[8+rsp],edx
+ mov ecx,r13d
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor ebp,DWORD PTR[44+rsp]
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rdx]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[16+rsp]
+ mov eax,r13d
+ mov DWORD PTR[12+rsp],ebp
+ mov ecx,r12d
+ xor r14d,DWORD PTR[24+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor r14d,DWORD PTR[48+rsp]
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rbp]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[20+rsp]
+ mov eax,r12d
+ mov DWORD PTR[16+rsp],r14d
+ mov ecx,r11d
+ xor edx,DWORD PTR[28+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor edx,DWORD PTR[52+rsp]
+ lea edi,DWORD PTR[((-899497514))+rdi*1+r14]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ xor ebp,DWORD PTR[24+rsp]
+ mov eax,r11d
+ mov DWORD PTR[20+rsp],edx
+ mov ecx,edi
+ xor ebp,DWORD PTR[32+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor ebp,DWORD PTR[56+rsp]
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rdx]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[28+rsp]
+ mov eax,edi
+ mov DWORD PTR[24+rsp],ebp
+ mov ecx,esi
+ xor r14d,DWORD PTR[36+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor r14d,DWORD PTR[60+rsp]
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rbp]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[32+rsp]
+ mov eax,esi
+ mov DWORD PTR[28+rsp],r14d
+ mov ecx,r13d
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor edx,DWORD PTR[rsp]
+ lea r12d,DWORD PTR[((-899497514))+r12*1+r14]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[36+rsp]
+ mov eax,r13d
+
+ mov ecx,r12d
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor ebp,DWORD PTR[4+rsp]
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rdx]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[40+rsp]
+ mov eax,r12d
+
+ mov ecx,r11d
+ xor r14d,DWORD PTR[48+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor r14d,DWORD PTR[8+rsp]
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rbp]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol r14d,1
+ xor edx,DWORD PTR[44+rsp]
+ mov eax,r11d
+
+ mov ecx,edi
+ xor edx,DWORD PTR[52+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor edx,DWORD PTR[12+rsp]
+ lea esi,DWORD PTR[((-899497514))+rsi*1+r14]
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ rol edx,1
+ xor ebp,DWORD PTR[48+rsp]
+ mov eax,edi
+
+ mov ecx,esi
+ xor ebp,DWORD PTR[56+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor ebp,DWORD PTR[16+rsp]
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+ xor eax,r11d
+ add r13d,ecx
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ xor r14d,DWORD PTR[52+rsp]
+ mov eax,esi
+
+ mov ecx,r13d
+ xor r14d,DWORD PTR[60+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor r14d,DWORD PTR[20+rsp]
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+ xor eax,edi
+ add r12d,ecx
+ rol esi,30
+ add r12d,eax
+ rol r14d,1
+ xor edx,DWORD PTR[56+rsp]
+ mov eax,r13d
+
+ mov ecx,r12d
+ xor edx,DWORD PTR[rsp]
+ xor eax,edi
+ rol ecx,5
+ xor edx,DWORD PTR[24+rsp]
+ lea r11d,DWORD PTR[((-899497514))+r11*1+r14]
+ xor eax,esi
+ add r11d,ecx
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ xor ebp,DWORD PTR[60+rsp]
+ mov eax,r12d
+
+ mov ecx,r11d
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor ebp,DWORD PTR[28+rsp]
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+ xor eax,r13d
+ add edi,ecx
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ mov eax,r11d
+ mov ecx,edi
+ xor eax,r13d
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+ rol ecx,5
+ xor eax,r12d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ add esi,DWORD PTR[r8]
+ add edi,DWORD PTR[4+r8]
+ add r11d,DWORD PTR[8+r8]
+ add r12d,DWORD PTR[12+r8]
+ add r13d,DWORD PTR[16+r8]
+ mov DWORD PTR[r8],esi
+ mov DWORD PTR[4+r8],edi
+ mov DWORD PTR[8+r8],r11d
+ mov DWORD PTR[12+r8],r12d
+ mov DWORD PTR[16+r8],r13d
+
+ sub r10,1
+ lea r9,QWORD PTR[64+r9]
+ jnz $L$loop
+
+ mov rsi,QWORD PTR[64+rsp]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha1_block_data_order::
+sha1_block_data_order ENDP
+
+ALIGN 16
+sha1_block_data_order_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order_ssse3::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+_ssse3_shortcut::
+ mov rax,rsp
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[(-40-96)+rax],xmm6
+ movaps XMMWORD PTR[(-40-80)+rax],xmm7
+ movaps XMMWORD PTR[(-40-64)+rax],xmm8
+ movaps XMMWORD PTR[(-40-48)+rax],xmm9
+ movaps XMMWORD PTR[(-40-32)+rax],xmm10
+ movaps XMMWORD PTR[(-40-16)+rax],xmm11
+$L$prologue_ssse3::
+ mov r14,rax
+ and rsp,-64
+ mov r8,rdi
+ mov r9,rsi
+ mov r10,rdx
+
+ shl r10,6
+ add r10,r9
+ lea r11,QWORD PTR[((K_XX_XX+64))]
+
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov esi,ebx
+ mov ebp,DWORD PTR[16+r8]
+ mov edi,ecx
+ xor edi,edx
+ and esi,edi
+
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[((-64))+r11]
+ movdqu xmm0,XMMWORD PTR[r9]
+ movdqu xmm1,XMMWORD PTR[16+r9]
+ movdqu xmm2,XMMWORD PTR[32+r9]
+ movdqu xmm3,XMMWORD PTR[48+r9]
+DB 102,15,56,0,198
+DB 102,15,56,0,206
+DB 102,15,56,0,214
+ add r9,64
+ paddd xmm0,xmm9
+DB 102,15,56,0,222
+ paddd xmm1,xmm9
+ paddd xmm2,xmm9
+ movdqa XMMWORD PTR[rsp],xmm0
+ psubd xmm0,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ psubd xmm1,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ psubd xmm2,xmm9
+ jmp $L$oop_ssse3
+ALIGN 16
+$L$oop_ssse3::
+ ror ebx,2
+ pshufd xmm4,xmm0,238
+ xor esi,edx
+ movdqa xmm8,xmm3
+ paddd xmm9,xmm3
+ mov edi,eax
+ add ebp,DWORD PTR[rsp]
+ punpcklqdq xmm4,xmm1
+ xor ebx,ecx
+ rol eax,5
+ add ebp,esi
+ psrldq xmm8,4
+ and edi,ebx
+ xor ebx,ecx
+ pxor xmm4,xmm0
+ add ebp,eax
+ ror eax,7
+ pxor xmm8,xmm2
+ xor edi,ecx
+ mov esi,ebp
+ add edx,DWORD PTR[4+rsp]
+ pxor xmm4,xmm8
+ xor eax,ebx
+ rol ebp,5
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ add edx,edi
+ and esi,eax
+ movdqa xmm10,xmm4
+ xor eax,ebx
+ add edx,ebp
+ ror ebp,7
+ movdqa xmm8,xmm4
+ xor esi,ebx
+ pslldq xmm10,12
+ paddd xmm4,xmm4
+ mov edi,edx
+ add ecx,DWORD PTR[8+rsp]
+ psrld xmm8,31
+ xor ebp,eax
+ rol edx,5
+ add ecx,esi
+ movdqa xmm9,xmm10
+ and edi,ebp
+ xor ebp,eax
+ psrld xmm10,30
+ add ecx,edx
+ ror edx,7
+ por xmm4,xmm8
+ xor edi,eax
+ mov esi,ecx
+ add ebx,DWORD PTR[12+rsp]
+ pslld xmm9,2
+ pxor xmm4,xmm10
+ xor edx,ebp
+ movdqa xmm10,XMMWORD PTR[((-64))+r11]
+ rol ecx,5
+ add ebx,edi
+ and esi,edx
+ pxor xmm4,xmm9
+ xor edx,ebp
+ add ebx,ecx
+ ror ecx,7
+ pshufd xmm5,xmm1,238
+ xor esi,ebp
+ movdqa xmm9,xmm4
+ paddd xmm10,xmm4
+ mov edi,ebx
+ add eax,DWORD PTR[16+rsp]
+ punpcklqdq xmm5,xmm2
+ xor ecx,edx
+ rol ebx,5
+ add eax,esi
+ psrldq xmm9,4
+ and edi,ecx
+ xor ecx,edx
+ pxor xmm5,xmm1
+ add eax,ebx
+ ror ebx,7
+ pxor xmm9,xmm3
+ xor edi,edx
+ mov esi,eax
+ add ebp,DWORD PTR[20+rsp]
+ pxor xmm5,xmm9
+ xor ebx,ecx
+ rol eax,5
+ movdqa XMMWORD PTR[rsp],xmm10
+ add ebp,edi
+ and esi,ebx
+ movdqa xmm8,xmm5
+ xor ebx,ecx
+ add ebp,eax
+ ror eax,7
+ movdqa xmm9,xmm5
+ xor esi,ecx
+ pslldq xmm8,12
+ paddd xmm5,xmm5
+ mov edi,ebp
+ add edx,DWORD PTR[24+rsp]
+ psrld xmm9,31
+ xor eax,ebx
+ rol ebp,5
+ add edx,esi
+ movdqa xmm10,xmm8
+ and edi,eax
+ xor eax,ebx
+ psrld xmm8,30
+ add edx,ebp
+ ror ebp,7
+ por xmm5,xmm9
+ xor edi,ebx
+ mov esi,edx
+ add ecx,DWORD PTR[28+rsp]
+ pslld xmm10,2
+ pxor xmm5,xmm8
+ xor ebp,eax
+ movdqa xmm8,XMMWORD PTR[((-32))+r11]
+ rol edx,5
+ add ecx,edi
+ and esi,ebp
+ pxor xmm5,xmm10
+ xor ebp,eax
+ add ecx,edx
+ ror edx,7
+ pshufd xmm6,xmm2,238
+ xor esi,eax
+ movdqa xmm10,xmm5
+ paddd xmm8,xmm5
+ mov edi,ecx
+ add ebx,DWORD PTR[32+rsp]
+ punpcklqdq xmm6,xmm3
+ xor edx,ebp
+ rol ecx,5
+ add ebx,esi
+ psrldq xmm10,4
+ and edi,edx
+ xor edx,ebp
+ pxor xmm6,xmm2
+ add ebx,ecx
+ ror ecx,7
+ pxor xmm10,xmm4
+ xor edi,ebp
+ mov esi,ebx
+ add eax,DWORD PTR[36+rsp]
+ pxor xmm6,xmm10
+ xor ecx,edx
+ rol ebx,5
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ add eax,edi
+ and esi,ecx
+ movdqa xmm9,xmm6
+ xor ecx,edx
+ add eax,ebx
+ ror ebx,7
+ movdqa xmm10,xmm6
+ xor esi,edx
+ pslldq xmm9,12
+ paddd xmm6,xmm6
+ mov edi,eax
+ add ebp,DWORD PTR[40+rsp]
+ psrld xmm10,31
+ xor ebx,ecx
+ rol eax,5
+ add ebp,esi
+ movdqa xmm8,xmm9
+ and edi,ebx
+ xor ebx,ecx
+ psrld xmm9,30
+ add ebp,eax
+ ror eax,7
+ por xmm6,xmm10
+ xor edi,ecx
+ mov esi,ebp
+ add edx,DWORD PTR[44+rsp]
+ pslld xmm8,2
+ pxor xmm6,xmm9
+ xor eax,ebx
+ movdqa xmm9,XMMWORD PTR[((-32))+r11]
+ rol ebp,5
+ add edx,edi
+ and esi,eax
+ pxor xmm6,xmm8
+ xor eax,ebx
+ add edx,ebp
+ ror ebp,7
+ pshufd xmm7,xmm3,238
+ xor esi,ebx
+ movdqa xmm8,xmm6
+ paddd xmm9,xmm6
+ mov edi,edx
+ add ecx,DWORD PTR[48+rsp]
+ punpcklqdq xmm7,xmm4
+ xor ebp,eax
+ rol edx,5
+ add ecx,esi
+ psrldq xmm8,4
+ and edi,ebp
+ xor ebp,eax
+ pxor xmm7,xmm3
+ add ecx,edx
+ ror edx,7
+ pxor xmm8,xmm5
+ xor edi,eax
+ mov esi,ecx
+ add ebx,DWORD PTR[52+rsp]
+ pxor xmm7,xmm8
+ xor edx,ebp
+ rol ecx,5
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ add ebx,edi
+ and esi,edx
+ movdqa xmm10,xmm7
+ xor edx,ebp
+ add ebx,ecx
+ ror ecx,7
+ movdqa xmm8,xmm7
+ xor esi,ebp
+ pslldq xmm10,12
+ paddd xmm7,xmm7
+ mov edi,ebx
+ add eax,DWORD PTR[56+rsp]
+ psrld xmm8,31
+ xor ecx,edx
+ rol ebx,5
+ add eax,esi
+ movdqa xmm9,xmm10
+ and edi,ecx
+ xor ecx,edx
+ psrld xmm10,30
+ add eax,ebx
+ ror ebx,7
+ por xmm7,xmm8
+ xor edi,edx
+ mov esi,eax
+ add ebp,DWORD PTR[60+rsp]
+ pslld xmm9,2
+ pxor xmm7,xmm10
+ xor ebx,ecx
+ movdqa xmm10,XMMWORD PTR[((-32))+r11]
+ rol eax,5
+ add ebp,edi
+ and esi,ebx
+ pxor xmm7,xmm9
+ pshufd xmm9,xmm6,238
+ xor ebx,ecx
+ add ebp,eax
+ ror eax,7
+ pxor xmm0,xmm4
+ xor esi,ecx
+ mov edi,ebp
+ add edx,DWORD PTR[rsp]
+ punpcklqdq xmm9,xmm7
+ xor eax,ebx
+ rol ebp,5
+ pxor xmm0,xmm1
+ add edx,esi
+ and edi,eax
+ movdqa xmm8,xmm10
+ xor eax,ebx
+ paddd xmm10,xmm7
+ add edx,ebp
+ pxor xmm0,xmm9
+ ror ebp,7
+ xor edi,ebx
+ mov esi,edx
+ add ecx,DWORD PTR[4+rsp]
+ movdqa xmm9,xmm0
+ xor ebp,eax
+ rol edx,5
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ add ecx,edi
+ and esi,ebp
+ xor ebp,eax
+ pslld xmm0,2
+ add ecx,edx
+ ror edx,7
+ psrld xmm9,30
+ xor esi,eax
+ mov edi,ecx
+ add ebx,DWORD PTR[8+rsp]
+ por xmm0,xmm9
+ xor edx,ebp
+ rol ecx,5
+ pshufd xmm10,xmm7,238
+ add ebx,esi
+ and edi,edx
+ xor edx,ebp
+ add ebx,ecx
+ add eax,DWORD PTR[12+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor esi,edx
+ ror ecx,7
+ add eax,ebx
+ pxor xmm1,xmm5
+ add ebp,DWORD PTR[16+rsp]
+ xor esi,ecx
+ punpcklqdq xmm10,xmm0
+ mov edi,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ add ebp,esi
+ xor edi,ecx
+ movdqa xmm9,xmm8
+ ror ebx,7
+ paddd xmm8,xmm0
+ add ebp,eax
+ pxor xmm1,xmm10
+ add edx,DWORD PTR[20+rsp]
+ xor edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm10,xmm1
+ add edx,edi
+ xor esi,ebx
+ movdqa XMMWORD PTR[rsp],xmm8
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[24+rsp]
+ pslld xmm1,2
+ xor esi,eax
+ mov edi,edx
+ psrld xmm10,30
+ rol edx,5
+ add ecx,esi
+ xor edi,eax
+ ror ebp,7
+ por xmm1,xmm10
+ add ecx,edx
+ add ebx,DWORD PTR[28+rsp]
+ pshufd xmm8,xmm0,238
+ xor edi,ebp
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor esi,ebp
+ ror edx,7
+ add ebx,ecx
+ pxor xmm2,xmm6
+ add eax,DWORD PTR[32+rsp]
+ xor esi,edx
+ punpcklqdq xmm8,xmm1
+ mov edi,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ add eax,esi
+ xor edi,edx
+ movdqa xmm10,XMMWORD PTR[r11]
+ ror ecx,7
+ paddd xmm9,xmm1
+ add eax,ebx
+ pxor xmm2,xmm8
+ add ebp,DWORD PTR[36+rsp]
+ xor edi,ecx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm8,xmm2
+ add ebp,edi
+ xor esi,ecx
+ movdqa XMMWORD PTR[16+rsp],xmm9
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[40+rsp]
+ pslld xmm2,2
+ xor esi,ebx
+ mov edi,ebp
+ psrld xmm8,30
+ rol ebp,5
+ add edx,esi
+ xor edi,ebx
+ ror eax,7
+ por xmm2,xmm8
+ add edx,ebp
+ add ecx,DWORD PTR[44+rsp]
+ pshufd xmm9,xmm1,238
+ xor edi,eax
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor esi,eax
+ ror ebp,7
+ add ecx,edx
+ pxor xmm3,xmm7
+ add ebx,DWORD PTR[48+rsp]
+ xor esi,ebp
+ punpcklqdq xmm9,xmm2
+ mov edi,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ add ebx,esi
+ xor edi,ebp
+ movdqa xmm8,xmm10
+ ror edx,7
+ paddd xmm10,xmm2
+ add ebx,ecx
+ pxor xmm3,xmm9
+ add eax,DWORD PTR[52+rsp]
+ xor edi,edx
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm9,xmm3
+ add eax,edi
+ xor esi,edx
+ movdqa XMMWORD PTR[32+rsp],xmm10
+ ror ecx,7
+ add eax,ebx
+ add ebp,DWORD PTR[56+rsp]
+ pslld xmm3,2
+ xor esi,ecx
+ mov edi,eax
+ psrld xmm9,30
+ rol eax,5
+ add ebp,esi
+ xor edi,ecx
+ ror ebx,7
+ por xmm3,xmm9
+ add ebp,eax
+ add edx,DWORD PTR[60+rsp]
+ pshufd xmm10,xmm2,238
+ xor edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ add edx,edi
+ xor esi,ebx
+ ror eax,7
+ add edx,ebp
+ pxor xmm4,xmm0
+ add ecx,DWORD PTR[rsp]
+ xor esi,eax
+ punpcklqdq xmm10,xmm3
+ mov edi,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ add ecx,esi
+ xor edi,eax
+ movdqa xmm9,xmm8
+ ror ebp,7
+ paddd xmm8,xmm3
+ add ecx,edx
+ pxor xmm4,xmm10
+ add ebx,DWORD PTR[4+rsp]
+ xor edi,ebp
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm10,xmm4
+ add ebx,edi
+ xor esi,ebp
+ movdqa XMMWORD PTR[48+rsp],xmm8
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[8+rsp]
+ pslld xmm4,2
+ xor esi,edx
+ mov edi,ebx
+ psrld xmm10,30
+ rol ebx,5
+ add eax,esi
+ xor edi,edx
+ ror ecx,7
+ por xmm4,xmm10
+ add eax,ebx
+ add ebp,DWORD PTR[12+rsp]
+ pshufd xmm8,xmm3,238
+ xor edi,ecx
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor esi,ecx
+ ror ebx,7
+ add ebp,eax
+ pxor xmm5,xmm1
+ add edx,DWORD PTR[16+rsp]
+ xor esi,ebx
+ punpcklqdq xmm8,xmm4
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm5,xmm6
+ add edx,esi
+ xor edi,ebx
+ movdqa xmm10,xmm9
+ ror eax,7
+ paddd xmm9,xmm4
+ add edx,ebp
+ pxor xmm5,xmm8
+ add ecx,DWORD PTR[20+rsp]
+ xor edi,eax
+ mov esi,edx
+ rol edx,5
+ movdqa xmm8,xmm5
+ add ecx,edi
+ xor esi,eax
+ movdqa XMMWORD PTR[rsp],xmm9
+ ror ebp,7
+ add ecx,edx
+ add ebx,DWORD PTR[24+rsp]
+ pslld xmm5,2
+ xor esi,ebp
+ mov edi,ecx
+ psrld xmm8,30
+ rol ecx,5
+ add ebx,esi
+ xor edi,ebp
+ ror edx,7
+ por xmm5,xmm8
+ add ebx,ecx
+ add eax,DWORD PTR[28+rsp]
+ pshufd xmm9,xmm4,238
+ ror ecx,7
+ mov esi,ebx
+ xor edi,edx
+ rol ebx,5
+ add eax,edi
+ xor esi,ecx
+ xor ecx,edx
+ add eax,ebx
+ pxor xmm6,xmm2
+ add ebp,DWORD PTR[32+rsp]
+ and esi,ecx
+ xor ecx,edx
+ ror ebx,7
+ punpcklqdq xmm9,xmm5
+ mov edi,eax
+ xor esi,ecx
+ pxor xmm6,xmm7
+ rol eax,5
+ add ebp,esi
+ movdqa xmm8,xmm10
+ xor edi,ebx
+ paddd xmm10,xmm5
+ xor ebx,ecx
+ pxor xmm6,xmm9
+ add ebp,eax
+ add edx,DWORD PTR[36+rsp]
+ and edi,ebx
+ xor ebx,ecx
+ ror eax,7
+ movdqa xmm9,xmm6
+ mov esi,ebp
+ xor edi,ebx
+ movdqa XMMWORD PTR[16+rsp],xmm10
+ rol ebp,5
+ add edx,edi
+ xor esi,eax
+ pslld xmm6,2
+ xor eax,ebx
+ add edx,ebp
+ psrld xmm9,30
+ add ecx,DWORD PTR[40+rsp]
+ and esi,eax
+ xor eax,ebx
+ por xmm6,xmm9
+ ror ebp,7
+ mov edi,edx
+ xor esi,eax
+ rol edx,5
+ pshufd xmm10,xmm5,238
+ add ecx,esi
+ xor edi,ebp
+ xor ebp,eax
+ add ecx,edx
+ add ebx,DWORD PTR[44+rsp]
+ and edi,ebp
+ xor ebp,eax
+ ror edx,7
+ mov esi,ecx
+ xor edi,ebp
+ rol ecx,5
+ add ebx,edi
+ xor esi,edx
+ xor edx,ebp
+ add ebx,ecx
+ pxor xmm7,xmm3
+ add eax,DWORD PTR[48+rsp]
+ and esi,edx
+ xor edx,ebp
+ ror ecx,7
+ punpcklqdq xmm10,xmm6
+ mov edi,ebx
+ xor esi,edx
+ pxor xmm7,xmm0
+ rol ebx,5
+ add eax,esi
+ movdqa xmm9,XMMWORD PTR[32+r11]
+ xor edi,ecx
+ paddd xmm8,xmm6
+ xor ecx,edx
+ pxor xmm7,xmm10
+ add eax,ebx
+ add ebp,DWORD PTR[52+rsp]
+ and edi,ecx
+ xor ecx,edx
+ ror ebx,7
+ movdqa xmm10,xmm7
+ mov esi,eax
+ xor edi,ecx
+ movdqa XMMWORD PTR[32+rsp],xmm8
+ rol eax,5
+ add ebp,edi
+ xor esi,ebx
+ pslld xmm7,2
+ xor ebx,ecx
+ add ebp,eax
+ psrld xmm10,30
+ add edx,DWORD PTR[56+rsp]
+ and esi,ebx
+ xor ebx,ecx
+ por xmm7,xmm10
+ ror eax,7
+ mov edi,ebp
+ xor esi,ebx
+ rol ebp,5
+ pshufd xmm8,xmm6,238
+ add edx,esi
+ xor edi,eax
+ xor eax,ebx
+ add edx,ebp
+ add ecx,DWORD PTR[60+rsp]
+ and edi,eax
+ xor eax,ebx
+ ror ebp,7
+ mov esi,edx
+ xor edi,eax
+ rol edx,5
+ add ecx,edi
+ xor esi,ebp
+ xor ebp,eax
+ add ecx,edx
+ pxor xmm0,xmm4
+ add ebx,DWORD PTR[rsp]
+ and esi,ebp
+ xor ebp,eax
+ ror edx,7
+ punpcklqdq xmm8,xmm7
+ mov edi,ecx
+ xor esi,ebp
+ pxor xmm0,xmm1
+ rol ecx,5
+ add ebx,esi
+ movdqa xmm10,xmm9
+ xor edi,edx
+ paddd xmm9,xmm7
+ xor edx,ebp
+ pxor xmm0,xmm8
+ add ebx,ecx
+ add eax,DWORD PTR[4+rsp]
+ and edi,edx
+ xor edx,ebp
+ ror ecx,7
+ movdqa xmm8,xmm0
+ mov esi,ebx
+ xor edi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ rol ebx,5
+ add eax,edi
+ xor esi,ecx
+ pslld xmm0,2
+ xor ecx,edx
+ add eax,ebx
+ psrld xmm8,30
+ add ebp,DWORD PTR[8+rsp]
+ and esi,ecx
+ xor ecx,edx
+ por xmm0,xmm8
+ ror ebx,7
+ mov edi,eax
+ xor esi,ecx
+ rol eax,5
+ pshufd xmm9,xmm7,238
+ add ebp,esi
+ xor edi,ebx
+ xor ebx,ecx
+ add ebp,eax
+ add edx,DWORD PTR[12+rsp]
+ and edi,ebx
+ xor ebx,ecx
+ ror eax,7
+ mov esi,ebp
+ xor edi,ebx
+ rol ebp,5
+ add edx,edi
+ xor esi,eax
+ xor eax,ebx
+ add edx,ebp
+ pxor xmm1,xmm5
+ add ecx,DWORD PTR[16+rsp]
+ and esi,eax
+ xor eax,ebx
+ ror ebp,7
+ punpcklqdq xmm9,xmm0
+ mov edi,edx
+ xor esi,eax
+ pxor xmm1,xmm2
+ rol edx,5
+ add ecx,esi
+ movdqa xmm8,xmm10
+ xor edi,ebp
+ paddd xmm10,xmm0
+ xor ebp,eax
+ pxor xmm1,xmm9
+ add ecx,edx
+ add ebx,DWORD PTR[20+rsp]
+ and edi,ebp
+ xor ebp,eax
+ ror edx,7
+ movdqa xmm9,xmm1
+ mov esi,ecx
+ xor edi,ebp
+ movdqa XMMWORD PTR[rsp],xmm10
+ rol ecx,5
+ add ebx,edi
+ xor esi,edx
+ pslld xmm1,2
+ xor edx,ebp
+ add ebx,ecx
+ psrld xmm9,30
+ add eax,DWORD PTR[24+rsp]
+ and esi,edx
+ xor edx,ebp
+ por xmm1,xmm9
+ ror ecx,7
+ mov edi,ebx
+ xor esi,edx
+ rol ebx,5
+ pshufd xmm10,xmm0,238
+ add eax,esi
+ xor edi,ecx
+ xor ecx,edx
+ add eax,ebx
+ add ebp,DWORD PTR[28+rsp]
+ and edi,ecx
+ xor ecx,edx
+ ror ebx,7
+ mov esi,eax
+ xor edi,ecx
+ rol eax,5
+ add ebp,edi
+ xor esi,ebx
+ xor ebx,ecx
+ add ebp,eax
+ pxor xmm2,xmm6
+ add edx,DWORD PTR[32+rsp]
+ and esi,ebx
+ xor ebx,ecx
+ ror eax,7
+ punpcklqdq xmm10,xmm1
+ mov edi,ebp
+ xor esi,ebx
+ pxor xmm2,xmm3
+ rol ebp,5
+ add edx,esi
+ movdqa xmm9,xmm8
+ xor edi,eax
+ paddd xmm8,xmm1
+ xor eax,ebx
+ pxor xmm2,xmm10
+ add edx,ebp
+ add ecx,DWORD PTR[36+rsp]
+ and edi,eax
+ xor eax,ebx
+ ror ebp,7
+ movdqa xmm10,xmm2
+ mov esi,edx
+ xor edi,eax
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ rol edx,5
+ add ecx,edi
+ xor esi,ebp
+ pslld xmm2,2
+ xor ebp,eax
+ add ecx,edx
+ psrld xmm10,30
+ add ebx,DWORD PTR[40+rsp]
+ and esi,ebp
+ xor ebp,eax
+ por xmm2,xmm10
+ ror edx,7
+ mov edi,ecx
+ xor esi,ebp
+ rol ecx,5
+ pshufd xmm8,xmm1,238
+ add ebx,esi
+ xor edi,edx
+ xor edx,ebp
+ add ebx,ecx
+ add eax,DWORD PTR[44+rsp]
+ and edi,edx
+ xor edx,ebp
+ ror ecx,7
+ mov esi,ebx
+ xor edi,edx
+ rol ebx,5
+ add eax,edi
+ xor esi,edx
+ add eax,ebx
+ pxor xmm3,xmm7
+ add ebp,DWORD PTR[48+rsp]
+ xor esi,ecx
+ punpcklqdq xmm8,xmm2
+ mov edi,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ add ebp,esi
+ xor edi,ecx
+ movdqa xmm10,xmm9
+ ror ebx,7
+ paddd xmm9,xmm2
+ add ebp,eax
+ pxor xmm3,xmm8
+ add edx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm8,xmm3
+ add edx,edi
+ xor esi,ebx
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[56+rsp]
+ pslld xmm3,2
+ xor esi,eax
+ mov edi,edx
+ psrld xmm8,30
+ rol edx,5
+ add ecx,esi
+ xor edi,eax
+ ror ebp,7
+ por xmm3,xmm8
+ add ecx,edx
+ add ebx,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor esi,ebp
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[rsp]
+ xor esi,edx
+ mov edi,ebx
+ rol ebx,5
+ paddd xmm10,xmm3
+ add eax,esi
+ xor edi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ ror ecx,7
+ add eax,ebx
+ add ebp,DWORD PTR[4+rsp]
+ xor edi,ecx
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor esi,ecx
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[8+rsp]
+ xor esi,ebx
+ mov edi,ebp
+ rol ebp,5
+ add edx,esi
+ xor edi,ebx
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[12+rsp]
+ xor edi,eax
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor esi,eax
+ ror ebp,7
+ add ecx,edx
+ cmp r9,r10
+ je $L$done_ssse3
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[((-64))+r11]
+ movdqu xmm0,XMMWORD PTR[r9]
+ movdqu xmm1,XMMWORD PTR[16+r9]
+ movdqu xmm2,XMMWORD PTR[32+r9]
+ movdqu xmm3,XMMWORD PTR[48+r9]
+DB 102,15,56,0,198
+ add r9,64
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,ebp
+ mov edi,ecx
+DB 102,15,56,0,206
+ rol ecx,5
+ add ebx,esi
+ xor edi,ebp
+ ror edx,7
+ paddd xmm0,xmm9
+ add ebx,ecx
+ add eax,DWORD PTR[20+rsp]
+ xor edi,edx
+ mov esi,ebx
+ movdqa XMMWORD PTR[rsp],xmm0
+ rol ebx,5
+ add eax,edi
+ xor esi,edx
+ ror ecx,7
+ psubd xmm0,xmm9
+ add eax,ebx
+ add ebp,DWORD PTR[24+rsp]
+ xor esi,ecx
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor edi,ecx
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ add edx,edi
+ xor esi,ebx
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,eax
+ mov edi,edx
+DB 102,15,56,0,214
+ rol edx,5
+ add ecx,esi
+ xor edi,eax
+ ror ebp,7
+ paddd xmm1,xmm9
+ add ecx,edx
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,ebp
+ mov esi,ecx
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ rol ecx,5
+ add ebx,edi
+ xor esi,ebp
+ ror edx,7
+ psubd xmm1,xmm9
+ add ebx,ecx
+ add eax,DWORD PTR[40+rsp]
+ xor esi,edx
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edi,edx
+ ror ecx,7
+ add eax,ebx
+ add ebp,DWORD PTR[44+rsp]
+ xor edi,ecx
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor esi,ecx
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ebx
+ mov edi,ebp
+DB 102,15,56,0,222
+ rol ebp,5
+ add edx,esi
+ xor edi,ebx
+ ror eax,7
+ paddd xmm2,xmm9
+ add edx,ebp
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,eax
+ mov esi,edx
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ rol edx,5
+ add ecx,edi
+ xor esi,eax
+ ror ebp,7
+ psubd xmm2,xmm9
+ add ecx,edx
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,ebp
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor edi,ebp
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[60+rsp]
+ xor edi,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ ror ecx,7
+ add eax,ebx
+ add eax,DWORD PTR[r8]
+ add esi,DWORD PTR[4+r8]
+ add ecx,DWORD PTR[8+r8]
+ add edx,DWORD PTR[12+r8]
+ mov DWORD PTR[r8],eax
+ add ebp,DWORD PTR[16+r8]
+ mov DWORD PTR[4+r8],esi
+ mov ebx,esi
+ mov DWORD PTR[8+r8],ecx
+ mov edi,ecx
+ mov DWORD PTR[12+r8],edx
+ xor edi,edx
+ mov DWORD PTR[16+r8],ebp
+ and esi,edi
+ jmp $L$oop_ssse3
+
+ALIGN 16
+$L$done_ssse3::
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,ebp
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor edi,ebp
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[20+rsp]
+ xor edi,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor esi,edx
+ ror ecx,7
+ add eax,ebx
+ add ebp,DWORD PTR[24+rsp]
+ xor esi,ecx
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor edi,ecx
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ add edx,edi
+ xor esi,ebx
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,eax
+ mov edi,edx
+ rol edx,5
+ add ecx,esi
+ xor edi,eax
+ ror ebp,7
+ add ecx,edx
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,ebp
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor esi,ebp
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[40+rsp]
+ xor esi,edx
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edi,edx
+ ror ecx,7
+ add eax,ebx
+ add ebp,DWORD PTR[44+rsp]
+ xor edi,ecx
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor esi,ecx
+ ror ebx,7
+ add ebp,eax
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ebx
+ mov edi,ebp
+ rol ebp,5
+ add edx,esi
+ xor edi,ebx
+ ror eax,7
+ add edx,ebp
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,eax
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor esi,eax
+ ror ebp,7
+ add ecx,edx
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,ebp
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor edi,ebp
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD PTR[60+rsp]
+ xor edi,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ ror ecx,7
+ add eax,ebx
+ add eax,DWORD PTR[r8]
+ add esi,DWORD PTR[4+r8]
+ add ecx,DWORD PTR[8+r8]
+ mov DWORD PTR[r8],eax
+ add edx,DWORD PTR[12+r8]
+ mov DWORD PTR[4+r8],esi
+ add ebp,DWORD PTR[16+r8]
+ mov DWORD PTR[8+r8],ecx
+ mov DWORD PTR[12+r8],edx
+ mov DWORD PTR[16+r8],ebp
+ movaps xmm6,XMMWORD PTR[((-40-96))+r14]
+ movaps xmm7,XMMWORD PTR[((-40-80))+r14]
+ movaps xmm8,XMMWORD PTR[((-40-64))+r14]
+ movaps xmm9,XMMWORD PTR[((-40-48))+r14]
+ movaps xmm10,XMMWORD PTR[((-40-32))+r14]
+ movaps xmm11,XMMWORD PTR[((-40-16))+r14]
+ lea rsi,QWORD PTR[r14]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
+$L$epilogue_ssse3::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha1_block_data_order_ssse3::
+sha1_block_data_order_ssse3 ENDP
+ALIGN 64
+K_XX_XX::
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h
+DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
+DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
+DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
+DB 103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$prologue]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov rax,QWORD PTR[64+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+
+ jmp $L$common_seh_tail
+se_handler ENDP
+
+ALIGN 16
+ssse3_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov rax,QWORD PTR[232+r8]
+
+ lea rsi,QWORD PTR[((-40-96))+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,12
+ DD 0a548f3fch
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+ssse3_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_sha1_block_data_order
+ DD imagerel $L$SEH_end_sha1_block_data_order
+ DD imagerel $L$SEH_info_sha1_block_data_order
+ DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3
+ DD imagerel $L$SEH_end_sha1_block_data_order_ssse3
+ DD imagerel $L$SEH_info_sha1_block_data_order_ssse3
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_sha1_block_data_order::
+DB 9,0,0,0
+ DD imagerel se_handler
+$L$SEH_info_sha1_block_data_order_ssse3::
+DB 9,0,0,0
+ DD imagerel ssse3_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/sha/sha256-x86_64.asm b/win-x86_64/crypto/sha/sha256-x86_64.asm
new file mode 100644
index 0000000..41f2edd
--- /dev/null
+++ b/win-x86_64/crypto/sha/sha256-x86_64.asm
@@ -0,0 +1,2997 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+PUBLIC sha256_block_data_order
+
+ALIGN 16
+sha256_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea r11,QWORD PTR[OPENSSL_ia32cap_P]
+ mov r9d,DWORD PTR[r11]
+ mov r10d,DWORD PTR[4+r11]
+ mov r11d,DWORD PTR[8+r11]
+ test r10d,512
+ jnz $L$ssse3_shortcut
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r11,rsp
+ shl rdx,4
+ sub rsp,16*4+4*8
+ lea rdx,QWORD PTR[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD PTR[((64+0))+rsp],rdi
+ mov QWORD PTR[((64+8))+rsp],rsi
+ mov QWORD PTR[((64+16))+rsp],rdx
+ mov QWORD PTR[((64+24))+rsp],r11
+$L$prologue::
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+ mov r8d,DWORD PTR[16+rdi]
+ mov r9d,DWORD PTR[20+rdi]
+ mov r10d,DWORD PTR[24+rdi]
+ mov r11d,DWORD PTR[28+rdi]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ mov edi,ebx
+ lea rbp,QWORD PTR[K256]
+ xor edi,ecx
+ mov r12d,DWORD PTR[rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD PTR[rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD PTR[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r11d,r14d
+ mov r12d,DWORD PTR[4+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD PTR[4+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r10d,r14d
+ mov r12d,DWORD PTR[8+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD PTR[8+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r9d,r14d
+ mov r12d,DWORD PTR[12+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD PTR[12+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ add r8d,r14d
+ mov r12d,DWORD PTR[16+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD PTR[16+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add edx,r14d
+ mov r12d,DWORD PTR[20+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD PTR[20+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add ecx,r14d
+ mov r12d,DWORD PTR[24+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD PTR[24+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add ebx,r14d
+ mov r12d,DWORD PTR[28+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD PTR[28+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ add eax,r14d
+ mov r12d,DWORD PTR[32+rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD PTR[32+rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD PTR[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r11d,r14d
+ mov r12d,DWORD PTR[36+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD PTR[36+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r10d,r14d
+ mov r12d,DWORD PTR[40+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD PTR[40+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add r9d,r14d
+ mov r12d,DWORD PTR[44+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD PTR[44+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ add r8d,r14d
+ mov r12d,DWORD PTR[48+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD PTR[48+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add edx,r14d
+ mov r12d,DWORD PTR[52+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD PTR[52+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add ecx,r14d
+ mov r12d,DWORD PTR[56+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD PTR[56+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ add ebx,r14d
+ mov r12d,DWORD PTR[60+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD PTR[60+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ jmp $L$rounds_16_xx
+ALIGN 16
+$L$rounds_16_xx::
+ mov r13d,DWORD PTR[4+rsp]
+ mov r15d,DWORD PTR[56+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add eax,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[36+rsp]
+
+ add r12d,DWORD PTR[rsp]
+ mov r13d,r8d
+ add r12d,r15d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD PTR[rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD PTR[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[8+rsp]
+ mov edi,DWORD PTR[60+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r11d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[40+rsp]
+
+ add r12d,DWORD PTR[4+rsp]
+ mov r13d,edx
+ add r12d,edi
+ mov r14d,r11d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD PTR[4+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[12+rsp]
+ mov r15d,DWORD PTR[rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r10d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[44+rsp]
+
+ add r12d,DWORD PTR[8+rsp]
+ mov r13d,ecx
+ add r12d,r15d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD PTR[8+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[16+rsp]
+ mov edi,DWORD PTR[4+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r9d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[48+rsp]
+
+ add r12d,DWORD PTR[12+rsp]
+ mov r13d,ebx
+ add r12d,edi
+ mov r14d,r9d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD PTR[12+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ mov r13d,DWORD PTR[20+rsp]
+ mov r15d,DWORD PTR[8+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r8d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[52+rsp]
+
+ add r12d,DWORD PTR[16+rsp]
+ mov r13d,eax
+ add r12d,r15d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD PTR[16+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[24+rsp]
+ mov edi,DWORD PTR[12+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add edx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[56+rsp]
+
+ add r12d,DWORD PTR[20+rsp]
+ mov r13d,r11d
+ add r12d,edi
+ mov r14d,edx
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD PTR[20+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[28+rsp]
+ mov r15d,DWORD PTR[16+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ecx,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[60+rsp]
+
+ add r12d,DWORD PTR[24+rsp]
+ mov r13d,r10d
+ add r12d,r15d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD PTR[24+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[32+rsp]
+ mov edi,DWORD PTR[20+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ebx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[rsp]
+
+ add r12d,DWORD PTR[28+rsp]
+ mov r13d,r9d
+ add r12d,edi
+ mov r14d,ebx
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD PTR[28+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ mov r13d,DWORD PTR[36+rsp]
+ mov r15d,DWORD PTR[24+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add eax,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[4+rsp]
+
+ add r12d,DWORD PTR[32+rsp]
+ mov r13d,r8d
+ add r12d,r15d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD PTR[32+rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD PTR[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[40+rsp]
+ mov edi,DWORD PTR[28+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r11d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[8+rsp]
+
+ add r12d,DWORD PTR[36+rsp]
+ mov r13d,edx
+ add r12d,edi
+ mov r14d,r11d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD PTR[36+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[44+rsp]
+ mov r15d,DWORD PTR[32+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r10d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[12+rsp]
+
+ add r12d,DWORD PTR[40+rsp]
+ mov r13d,ecx
+ add r12d,r15d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD PTR[40+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[48+rsp]
+ mov edi,DWORD PTR[36+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r9d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[16+rsp]
+
+ add r12d,DWORD PTR[44+rsp]
+ mov r13d,ebx
+ add r12d,edi
+ mov r14d,r9d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD PTR[44+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ mov r13d,DWORD PTR[52+rsp]
+ mov r15d,DWORD PTR[40+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r8d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[20+rsp]
+
+ add r12d,DWORD PTR[48+rsp]
+ mov r13d,eax
+ add r12d,r15d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD PTR[48+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD PTR[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[56+rsp]
+ mov edi,DWORD PTR[44+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add edx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[24+rsp]
+
+ add r12d,DWORD PTR[52+rsp]
+ mov r13d,r11d
+ add r12d,edi
+ mov r14d,edx
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD PTR[52+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[60+rsp]
+ mov r15d,DWORD PTR[48+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ecx,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD PTR[28+rsp]
+
+ add r12d,DWORD PTR[56+rsp]
+ mov r13d,r10d
+ add r12d,r15d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD PTR[56+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,QWORD PTR[4+rbp]
+ mov r13d,DWORD PTR[rsp]
+ mov edi,DWORD PTR[52+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ebx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD PTR[32+rsp]
+
+ add r12d,DWORD PTR[60+rsp]
+ mov r13d,r9d
+ add r12d,edi
+ mov r14d,ebx
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD PTR[60+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD PTR[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,QWORD PTR[20+rbp]
+ cmp BYTE PTR[3+rbp],0
+ jnz $L$rounds_16_xx
+
+ mov rdi,QWORD PTR[((64+0))+rsp]
+ add eax,r14d
+ lea rsi,QWORD PTR[64+rsi]
+
+ add eax,DWORD PTR[rdi]
+ add ebx,DWORD PTR[4+rdi]
+ add ecx,DWORD PTR[8+rdi]
+ add edx,DWORD PTR[12+rdi]
+ add r8d,DWORD PTR[16+rdi]
+ add r9d,DWORD PTR[20+rdi]
+ add r10d,DWORD PTR[24+rdi]
+ add r11d,DWORD PTR[28+rdi]
+
+ cmp rsi,QWORD PTR[((64+16))+rsp]
+
+ mov DWORD PTR[rdi],eax
+ mov DWORD PTR[4+rdi],ebx
+ mov DWORD PTR[8+rdi],ecx
+ mov DWORD PTR[12+rdi],edx
+ mov DWORD PTR[16+rdi],r8d
+ mov DWORD PTR[20+rdi],r9d
+ mov DWORD PTR[24+rdi],r10d
+ mov DWORD PTR[28+rdi],r11d
+ jb $L$loop
+
+ mov rsi,QWORD PTR[((64+24))+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha256_block_data_order::
+sha256_block_data_order ENDP
+ALIGN 64
+
+K256::
+ DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
+ DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
+ DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
+ DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
+ DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
+ DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
+ DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
+ DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
+ DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
+ DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
+ DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
+ DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
+ DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
+ DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
+ DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
+ DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
+ DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
+ DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
+ DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
+ DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
+ DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
+ DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
+ DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
+ DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
+ DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
+ DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
+ DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
+ DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
+ DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
+ DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
+ DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
+ DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
+
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+ DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh
+ DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh
+ DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h
+ DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h
+DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB 111,114,103,62,0
+
+ALIGN 64
+sha256_block_data_order_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order_ssse3::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+$L$ssse3_shortcut::
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r11,rsp
+ shl rdx,4
+ sub rsp,160
+ lea rdx,QWORD PTR[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD PTR[((64+0))+rsp],rdi
+ mov QWORD PTR[((64+8))+rsp],rsi
+ mov QWORD PTR[((64+16))+rsp],rdx
+ mov QWORD PTR[((64+24))+rsp],r11
+ movaps XMMWORD PTR[(64+32)+rsp],xmm6
+ movaps XMMWORD PTR[(64+48)+rsp],xmm7
+ movaps XMMWORD PTR[(64+64)+rsp],xmm8
+ movaps XMMWORD PTR[(64+80)+rsp],xmm9
+$L$prologue_ssse3::
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+ mov r8d,DWORD PTR[16+rdi]
+ mov r9d,DWORD PTR[20+rdi]
+ mov r10d,DWORD PTR[24+rdi]
+ mov r11d,DWORD PTR[28+rdi]
+
+
+ jmp $L$loop_ssse3
+ALIGN 16
+$L$loop_ssse3::
+ movdqa xmm7,XMMWORD PTR[((K256+512))]
+ movdqu xmm0,XMMWORD PTR[rsi]
+ movdqu xmm1,XMMWORD PTR[16+rsi]
+ movdqu xmm2,XMMWORD PTR[32+rsi]
+DB 102,15,56,0,199
+ movdqu xmm3,XMMWORD PTR[48+rsi]
+ lea rbp,QWORD PTR[K256]
+DB 102,15,56,0,207
+ movdqa xmm4,XMMWORD PTR[rbp]
+ movdqa xmm5,XMMWORD PTR[32+rbp]
+DB 102,15,56,0,215
+ paddd xmm4,xmm0
+ movdqa xmm6,XMMWORD PTR[64+rbp]
+DB 102,15,56,0,223
+ movdqa xmm7,XMMWORD PTR[96+rbp]
+ paddd xmm5,xmm1
+ paddd xmm6,xmm2
+ paddd xmm7,xmm3
+ movdqa XMMWORD PTR[rsp],xmm4
+ mov r14d,eax
+ movdqa XMMWORD PTR[16+rsp],xmm5
+ mov edi,ebx
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ xor edi,ecx
+ movdqa XMMWORD PTR[48+rsp],xmm7
+ mov r13d,r8d
+ jmp $L$ssse3_00_47
+
+ALIGN 16
+$L$ssse3_00_47::
+ sub rbp,-128
+ ror r13d,14
+ movdqa xmm4,xmm1
+ mov eax,r14d
+ mov r12d,r9d
+ movdqa xmm7,xmm3
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+DB 102,15,58,15,224,4
+ and r12d,r8d
+ xor r13d,r8d
+DB 102,15,58,15,250,4
+ add r11d,DWORD PTR[rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,ebx
+ add r11d,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ paddd xmm0,xmm7
+ ror r14d,2
+ add edx,r11d
+ psrld xmm6,7
+ add r11d,edi
+ mov r13d,edx
+ pshufd xmm7,xmm3,250
+ add r14d,r11d
+ ror r13d,14
+ pslld xmm5,14
+ mov r11d,r14d
+ mov r12d,r8d
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,r11d
+ pxor xmm4,xmm5
+ and r12d,edx
+ xor r13d,edx
+ pslld xmm5,11
+ add r10d,DWORD PTR[4+rsp]
+ mov edi,r11d
+ pxor xmm4,xmm6
+ xor r12d,r9d
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,eax
+ add r10d,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ psrld xmm7,10
+ add r10d,r13d
+ xor r15d,eax
+ paddd xmm0,xmm4
+ ror r14d,2
+ add ecx,r10d
+ psrlq xmm6,17
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,ecx
+ xor r12d,r8d
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ pshufd xmm7,xmm7,128
+ xor r13d,ecx
+ add r9d,DWORD PTR[8+rsp]
+ mov r15d,r10d
+ psrldq xmm7,8
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ paddd xmm0,xmm7
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ pshufd xmm7,xmm0,80
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ movdqa xmm6,xmm7
+ add r9d,edi
+ mov r13d,ebx
+ psrld xmm7,10
+ add r14d,r9d
+ ror r13d,14
+ psrlq xmm6,17
+ mov r9d,r14d
+ mov r12d,ecx
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ psrlq xmm6,2
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD PTR[12+rsp]
+ pxor xmm7,xmm6
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,r10d
+ add r8d,r12d
+ movdqa xmm6,XMMWORD PTR[rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ paddd xmm0,xmm7
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ paddd xmm6,xmm0
+ mov r13d,eax
+ add r14d,r8d
+ movdqa XMMWORD PTR[rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm2
+ mov r8d,r14d
+ mov r12d,ebx
+ movdqa xmm7,xmm0
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+DB 102,15,58,15,225,4
+ and r12d,eax
+ xor r13d,eax
+DB 102,15,58,15,251,4
+ add edx,DWORD PTR[16+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,r9d
+ add edx,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ paddd xmm1,xmm7
+ ror r14d,2
+ add r11d,edx
+ psrld xmm6,7
+ add edx,edi
+ mov r13d,r11d
+ pshufd xmm7,xmm0,250
+ add r14d,edx
+ ror r13d,14
+ pslld xmm5,14
+ mov edx,r14d
+ mov r12d,eax
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,edx
+ pxor xmm4,xmm5
+ and r12d,r11d
+ xor r13d,r11d
+ pslld xmm5,11
+ add ecx,DWORD PTR[20+rsp]
+ mov edi,edx
+ pxor xmm4,xmm6
+ xor r12d,ebx
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,r8d
+ add ecx,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ psrld xmm7,10
+ add ecx,r13d
+ xor r15d,r8d
+ paddd xmm1,xmm4
+ ror r14d,2
+ add r10d,ecx
+ psrlq xmm6,17
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,r10d
+ xor r12d,eax
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ pshufd xmm7,xmm7,128
+ xor r13d,r10d
+ add ebx,DWORD PTR[24+rsp]
+ mov r15d,ecx
+ psrldq xmm7,8
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ paddd xmm1,xmm7
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ pshufd xmm7,xmm1,80
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ movdqa xmm6,xmm7
+ add ebx,edi
+ mov r13d,r9d
+ psrld xmm7,10
+ add r14d,ebx
+ ror r13d,14
+ psrlq xmm6,17
+ mov ebx,r14d
+ mov r12d,r10d
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ psrlq xmm6,2
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD PTR[28+rsp]
+ pxor xmm7,xmm6
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,ecx
+ add eax,r12d
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ paddd xmm1,xmm7
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ paddd xmm6,xmm1
+ mov r13d,r8d
+ add r14d,eax
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm3
+ mov eax,r14d
+ mov r12d,r9d
+ movdqa xmm7,xmm1
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+DB 102,15,58,15,226,4
+ and r12d,r8d
+ xor r13d,r8d
+DB 102,15,58,15,248,4
+ add r11d,DWORD PTR[32+rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,ebx
+ add r11d,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ paddd xmm2,xmm7
+ ror r14d,2
+ add edx,r11d
+ psrld xmm6,7
+ add r11d,edi
+ mov r13d,edx
+ pshufd xmm7,xmm1,250
+ add r14d,r11d
+ ror r13d,14
+ pslld xmm5,14
+ mov r11d,r14d
+ mov r12d,r8d
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,r11d
+ pxor xmm4,xmm5
+ and r12d,edx
+ xor r13d,edx
+ pslld xmm5,11
+ add r10d,DWORD PTR[36+rsp]
+ mov edi,r11d
+ pxor xmm4,xmm6
+ xor r12d,r9d
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,eax
+ add r10d,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ psrld xmm7,10
+ add r10d,r13d
+ xor r15d,eax
+ paddd xmm2,xmm4
+ ror r14d,2
+ add ecx,r10d
+ psrlq xmm6,17
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,ecx
+ xor r12d,r8d
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ pshufd xmm7,xmm7,128
+ xor r13d,ecx
+ add r9d,DWORD PTR[40+rsp]
+ mov r15d,r10d
+ psrldq xmm7,8
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ paddd xmm2,xmm7
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ pshufd xmm7,xmm2,80
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ movdqa xmm6,xmm7
+ add r9d,edi
+ mov r13d,ebx
+ psrld xmm7,10
+ add r14d,r9d
+ ror r13d,14
+ psrlq xmm6,17
+ mov r9d,r14d
+ mov r12d,ecx
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ psrlq xmm6,2
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD PTR[44+rsp]
+ pxor xmm7,xmm6
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,r10d
+ add r8d,r12d
+ movdqa xmm6,XMMWORD PTR[64+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ paddd xmm2,xmm7
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ paddd xmm6,xmm2
+ mov r13d,eax
+ add r14d,r8d
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm0
+ mov r8d,r14d
+ mov r12d,ebx
+ movdqa xmm7,xmm2
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+DB 102,15,58,15,227,4
+ and r12d,eax
+ xor r13d,eax
+DB 102,15,58,15,249,4
+ add edx,DWORD PTR[48+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,r9d
+ add edx,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ paddd xmm3,xmm7
+ ror r14d,2
+ add r11d,edx
+ psrld xmm6,7
+ add edx,edi
+ mov r13d,r11d
+ pshufd xmm7,xmm2,250
+ add r14d,edx
+ ror r13d,14
+ pslld xmm5,14
+ mov edx,r14d
+ mov r12d,eax
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,edx
+ pxor xmm4,xmm5
+ and r12d,r11d
+ xor r13d,r11d
+ pslld xmm5,11
+ add ecx,DWORD PTR[52+rsp]
+ mov edi,edx
+ pxor xmm4,xmm6
+ xor r12d,ebx
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,r8d
+ add ecx,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ psrld xmm7,10
+ add ecx,r13d
+ xor r15d,r8d
+ paddd xmm3,xmm4
+ ror r14d,2
+ add r10d,ecx
+ psrlq xmm6,17
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,r10d
+ xor r12d,eax
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ pshufd xmm7,xmm7,128
+ xor r13d,r10d
+ add ebx,DWORD PTR[56+rsp]
+ mov r15d,ecx
+ psrldq xmm7,8
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ paddd xmm3,xmm7
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ pshufd xmm7,xmm3,80
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ movdqa xmm6,xmm7
+ add ebx,edi
+ mov r13d,r9d
+ psrld xmm7,10
+ add r14d,ebx
+ ror r13d,14
+ psrlq xmm6,17
+ mov ebx,r14d
+ mov r12d,r10d
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ psrlq xmm6,2
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD PTR[60+rsp]
+ pxor xmm7,xmm6
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,ecx
+ add eax,r12d
+ movdqa xmm6,XMMWORD PTR[96+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ paddd xmm3,xmm7
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ paddd xmm6,xmm3
+ mov r13d,r8d
+ add r14d,eax
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ cmp BYTE PTR[131+rbp],0
+ jne $L$ssse3_00_47
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD PTR[rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ ror r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD PTR[4+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ ror r14d,11
+ xor edi,eax
+ add r10d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ ror r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD PTR[8+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD PTR[12+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD PTR[16+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ ror r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD PTR[20+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ ror r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ ror r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD PTR[24+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD PTR[28+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ xor edi,ecx
+ add eax,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD PTR[32+rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ ror r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD PTR[36+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ ror r14d,11
+ xor edi,eax
+ add r10d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ ror r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD PTR[40+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD PTR[44+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD PTR[48+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ ror r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD PTR[52+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ ror r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ ror r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD PTR[56+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD PTR[60+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ xor edi,ecx
+ add eax,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ mov rdi,QWORD PTR[((64+0))+rsp]
+ mov eax,r14d
+
+ add eax,DWORD PTR[rdi]
+ lea rsi,QWORD PTR[64+rsi]
+ add ebx,DWORD PTR[4+rdi]
+ add ecx,DWORD PTR[8+rdi]
+ add edx,DWORD PTR[12+rdi]
+ add r8d,DWORD PTR[16+rdi]
+ add r9d,DWORD PTR[20+rdi]
+ add r10d,DWORD PTR[24+rdi]
+ add r11d,DWORD PTR[28+rdi]
+
+ cmp rsi,QWORD PTR[((64+16))+rsp]
+
+ mov DWORD PTR[rdi],eax
+ mov DWORD PTR[4+rdi],ebx
+ mov DWORD PTR[8+rdi],ecx
+ mov DWORD PTR[12+rdi],edx
+ mov DWORD PTR[16+rdi],r8d
+ mov DWORD PTR[20+rdi],r9d
+ mov DWORD PTR[24+rdi],r10d
+ mov DWORD PTR[28+rdi],r11d
+ jb $L$loop_ssse3
+
+ mov rsi,QWORD PTR[((64+24))+rsp]
+ movaps xmm6,XMMWORD PTR[((64+32))+rsp]
+ movaps xmm7,XMMWORD PTR[((64+48))+rsp]
+ movaps xmm8,XMMWORD PTR[((64+64))+rsp]
+ movaps xmm9,XMMWORD PTR[((64+80))+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue_ssse3::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha256_block_data_order_ssse3::
+sha256_block_data_order_ssse3 ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+ mov rsi,rax
+ mov rax,QWORD PTR[((64+24))+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ lea rsi,QWORD PTR[((64+32))+rsi]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_sha256_block_data_order
+ DD imagerel $L$SEH_end_sha256_block_data_order
+ DD imagerel $L$SEH_info_sha256_block_data_order
+ DD imagerel $L$SEH_begin_sha256_block_data_order_ssse3
+ DD imagerel $L$SEH_end_sha256_block_data_order_ssse3
+ DD imagerel $L$SEH_info_sha256_block_data_order_ssse3
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_sha256_block_data_order::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$prologue,imagerel $L$epilogue
+$L$SEH_info_sha256_block_data_order_ssse3::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/sha/sha512-x86_64.asm b/win-x86_64/crypto/sha/sha512-x86_64.asm
new file mode 100644
index 0000000..e993c3c
--- /dev/null
+++ b/win-x86_64/crypto/sha/sha512-x86_64.asm
@@ -0,0 +1,1913 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+EXTERN OPENSSL_ia32cap_P:NEAR
+PUBLIC sha512_block_data_order
+
+ALIGN 16
+sha512_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha512_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r11,rsp
+ shl rdx,4
+ sub rsp,16*8+4*8
+ lea rdx,QWORD PTR[rdx*8+rsi]
+ and rsp,-64
+ mov QWORD PTR[((128+0))+rsp],rdi
+ mov QWORD PTR[((128+8))+rsp],rsi
+ mov QWORD PTR[((128+16))+rsp],rdx
+ mov QWORD PTR[((128+24))+rsp],r11
+$L$prologue::
+
+ mov rax,QWORD PTR[rdi]
+ mov rbx,QWORD PTR[8+rdi]
+ mov rcx,QWORD PTR[16+rdi]
+ mov rdx,QWORD PTR[24+rdi]
+ mov r8,QWORD PTR[32+rdi]
+ mov r9,QWORD PTR[40+rdi]
+ mov r10,QWORD PTR[48+rdi]
+ mov r11,QWORD PTR[56+rdi]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ mov rdi,rbx
+ lea rbp,QWORD PTR[K512]
+ xor rdi,rcx
+ mov r12,QWORD PTR[rsi]
+ mov r13,r8
+ mov r14,rax
+ bswap r12
+ ror r13,23
+ mov r15,r9
+
+ xor r13,r8
+ ror r14,5
+ xor r15,r10
+
+ mov QWORD PTR[rsp],r12
+ xor r14,rax
+ and r15,r8
+
+ ror r13,4
+ add r12,r11
+ xor r15,r10
+
+ ror r14,6
+ xor r13,r8
+ add r12,r15
+
+ mov r15,rax
+ add r12,QWORD PTR[rbp]
+ xor r14,rax
+
+ xor r15,rbx
+ ror r13,14
+ mov r11,rbx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r11,rdi
+ add rdx,r12
+ add r11,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add r11,r14
+ mov r12,QWORD PTR[8+rsi]
+ mov r13,rdx
+ mov r14,r11
+ bswap r12
+ ror r13,23
+ mov rdi,r8
+
+ xor r13,rdx
+ ror r14,5
+ xor rdi,r9
+
+ mov QWORD PTR[8+rsp],r12
+ xor r14,r11
+ and rdi,rdx
+
+ ror r13,4
+ add r12,r10
+ xor rdi,r9
+
+ ror r14,6
+ xor r13,rdx
+ add r12,rdi
+
+ mov rdi,r11
+ add r12,QWORD PTR[rbp]
+ xor r14,r11
+
+ xor rdi,rax
+ ror r13,14
+ mov r10,rax
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r10,r15
+ add rcx,r12
+ add r10,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add r10,r14
+ mov r12,QWORD PTR[16+rsi]
+ mov r13,rcx
+ mov r14,r10
+ bswap r12
+ ror r13,23
+ mov r15,rdx
+
+ xor r13,rcx
+ ror r14,5
+ xor r15,r8
+
+ mov QWORD PTR[16+rsp],r12
+ xor r14,r10
+ and r15,rcx
+
+ ror r13,4
+ add r12,r9
+ xor r15,r8
+
+ ror r14,6
+ xor r13,rcx
+ add r12,r15
+
+ mov r15,r10
+ add r12,QWORD PTR[rbp]
+ xor r14,r10
+
+ xor r15,r11
+ ror r13,14
+ mov r9,r11
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r9,rdi
+ add rbx,r12
+ add r9,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add r9,r14
+ mov r12,QWORD PTR[24+rsi]
+ mov r13,rbx
+ mov r14,r9
+ bswap r12
+ ror r13,23
+ mov rdi,rcx
+
+ xor r13,rbx
+ ror r14,5
+ xor rdi,rdx
+
+ mov QWORD PTR[24+rsp],r12
+ xor r14,r9
+ and rdi,rbx
+
+ ror r13,4
+ add r12,r8
+ xor rdi,rdx
+
+ ror r14,6
+ xor r13,rbx
+ add r12,rdi
+
+ mov rdi,r9
+ add r12,QWORD PTR[rbp]
+ xor r14,r9
+
+ xor rdi,r10
+ ror r13,14
+ mov r8,r10
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r8,r15
+ add rax,r12
+ add r8,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add r8,r14
+ mov r12,QWORD PTR[32+rsi]
+ mov r13,rax
+ mov r14,r8
+ bswap r12
+ ror r13,23
+ mov r15,rbx
+
+ xor r13,rax
+ ror r14,5
+ xor r15,rcx
+
+ mov QWORD PTR[32+rsp],r12
+ xor r14,r8
+ and r15,rax
+
+ ror r13,4
+ add r12,rdx
+ xor r15,rcx
+
+ ror r14,6
+ xor r13,rax
+ add r12,r15
+
+ mov r15,r8
+ add r12,QWORD PTR[rbp]
+ xor r14,r8
+
+ xor r15,r9
+ ror r13,14
+ mov rdx,r9
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rdx,rdi
+ add r11,r12
+ add rdx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add rdx,r14
+ mov r12,QWORD PTR[40+rsi]
+ mov r13,r11
+ mov r14,rdx
+ bswap r12
+ ror r13,23
+ mov rdi,rax
+
+ xor r13,r11
+ ror r14,5
+ xor rdi,rbx
+
+ mov QWORD PTR[40+rsp],r12
+ xor r14,rdx
+ and rdi,r11
+
+ ror r13,4
+ add r12,rcx
+ xor rdi,rbx
+
+ ror r14,6
+ xor r13,r11
+ add r12,rdi
+
+ mov rdi,rdx
+ add r12,QWORD PTR[rbp]
+ xor r14,rdx
+
+ xor rdi,r8
+ ror r13,14
+ mov rcx,r8
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rcx,r15
+ add r10,r12
+ add rcx,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add rcx,r14
+ mov r12,QWORD PTR[48+rsi]
+ mov r13,r10
+ mov r14,rcx
+ bswap r12
+ ror r13,23
+ mov r15,r11
+
+ xor r13,r10
+ ror r14,5
+ xor r15,rax
+
+ mov QWORD PTR[48+rsp],r12
+ xor r14,rcx
+ and r15,r10
+
+ ror r13,4
+ add r12,rbx
+ xor r15,rax
+
+ ror r14,6
+ xor r13,r10
+ add r12,r15
+
+ mov r15,rcx
+ add r12,QWORD PTR[rbp]
+ xor r14,rcx
+
+ xor r15,rdx
+ ror r13,14
+ mov rbx,rdx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rbx,rdi
+ add r9,r12
+ add rbx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add rbx,r14
+ mov r12,QWORD PTR[56+rsi]
+ mov r13,r9
+ mov r14,rbx
+ bswap r12
+ ror r13,23
+ mov rdi,r10
+
+ xor r13,r9
+ ror r14,5
+ xor rdi,r11
+
+ mov QWORD PTR[56+rsp],r12
+ xor r14,rbx
+ and rdi,r9
+
+ ror r13,4
+ add r12,rax
+ xor rdi,r11
+
+ ror r14,6
+ xor r13,r9
+ add r12,rdi
+
+ mov rdi,rbx
+ add r12,QWORD PTR[rbp]
+ xor r14,rbx
+
+ xor rdi,rcx
+ ror r13,14
+ mov rax,rcx
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rax,r15
+ add r8,r12
+ add rax,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add rax,r14
+ mov r12,QWORD PTR[64+rsi]
+ mov r13,r8
+ mov r14,rax
+ bswap r12
+ ror r13,23
+ mov r15,r9
+
+ xor r13,r8
+ ror r14,5
+ xor r15,r10
+
+ mov QWORD PTR[64+rsp],r12
+ xor r14,rax
+ and r15,r8
+
+ ror r13,4
+ add r12,r11
+ xor r15,r10
+
+ ror r14,6
+ xor r13,r8
+ add r12,r15
+
+ mov r15,rax
+ add r12,QWORD PTR[rbp]
+ xor r14,rax
+
+ xor r15,rbx
+ ror r13,14
+ mov r11,rbx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r11,rdi
+ add rdx,r12
+ add r11,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add r11,r14
+ mov r12,QWORD PTR[72+rsi]
+ mov r13,rdx
+ mov r14,r11
+ bswap r12
+ ror r13,23
+ mov rdi,r8
+
+ xor r13,rdx
+ ror r14,5
+ xor rdi,r9
+
+ mov QWORD PTR[72+rsp],r12
+ xor r14,r11
+ and rdi,rdx
+
+ ror r13,4
+ add r12,r10
+ xor rdi,r9
+
+ ror r14,6
+ xor r13,rdx
+ add r12,rdi
+
+ mov rdi,r11
+ add r12,QWORD PTR[rbp]
+ xor r14,r11
+
+ xor rdi,rax
+ ror r13,14
+ mov r10,rax
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r10,r15
+ add rcx,r12
+ add r10,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add r10,r14
+ mov r12,QWORD PTR[80+rsi]
+ mov r13,rcx
+ mov r14,r10
+ bswap r12
+ ror r13,23
+ mov r15,rdx
+
+ xor r13,rcx
+ ror r14,5
+ xor r15,r8
+
+ mov QWORD PTR[80+rsp],r12
+ xor r14,r10
+ and r15,rcx
+
+ ror r13,4
+ add r12,r9
+ xor r15,r8
+
+ ror r14,6
+ xor r13,rcx
+ add r12,r15
+
+ mov r15,r10
+ add r12,QWORD PTR[rbp]
+ xor r14,r10
+
+ xor r15,r11
+ ror r13,14
+ mov r9,r11
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r9,rdi
+ add rbx,r12
+ add r9,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add r9,r14
+ mov r12,QWORD PTR[88+rsi]
+ mov r13,rbx
+ mov r14,r9
+ bswap r12
+ ror r13,23
+ mov rdi,rcx
+
+ xor r13,rbx
+ ror r14,5
+ xor rdi,rdx
+
+ mov QWORD PTR[88+rsp],r12
+ xor r14,r9
+ and rdi,rbx
+
+ ror r13,4
+ add r12,r8
+ xor rdi,rdx
+
+ ror r14,6
+ xor r13,rbx
+ add r12,rdi
+
+ mov rdi,r9
+ add r12,QWORD PTR[rbp]
+ xor r14,r9
+
+ xor rdi,r10
+ ror r13,14
+ mov r8,r10
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r8,r15
+ add rax,r12
+ add r8,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add r8,r14
+ mov r12,QWORD PTR[96+rsi]
+ mov r13,rax
+ mov r14,r8
+ bswap r12
+ ror r13,23
+ mov r15,rbx
+
+ xor r13,rax
+ ror r14,5
+ xor r15,rcx
+
+ mov QWORD PTR[96+rsp],r12
+ xor r14,r8
+ and r15,rax
+
+ ror r13,4
+ add r12,rdx
+ xor r15,rcx
+
+ ror r14,6
+ xor r13,rax
+ add r12,r15
+
+ mov r15,r8
+ add r12,QWORD PTR[rbp]
+ xor r14,r8
+
+ xor r15,r9
+ ror r13,14
+ mov rdx,r9
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rdx,rdi
+ add r11,r12
+ add rdx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add rdx,r14
+ mov r12,QWORD PTR[104+rsi]
+ mov r13,r11
+ mov r14,rdx
+ bswap r12
+ ror r13,23
+ mov rdi,rax
+
+ xor r13,r11
+ ror r14,5
+ xor rdi,rbx
+
+ mov QWORD PTR[104+rsp],r12
+ xor r14,rdx
+ and rdi,r11
+
+ ror r13,4
+ add r12,rcx
+ xor rdi,rbx
+
+ ror r14,6
+ xor r13,r11
+ add r12,rdi
+
+ mov rdi,rdx
+ add r12,QWORD PTR[rbp]
+ xor r14,rdx
+
+ xor rdi,r8
+ ror r13,14
+ mov rcx,r8
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rcx,r15
+ add r10,r12
+ add rcx,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ add rcx,r14
+ mov r12,QWORD PTR[112+rsi]
+ mov r13,r10
+ mov r14,rcx
+ bswap r12
+ ror r13,23
+ mov r15,r11
+
+ xor r13,r10
+ ror r14,5
+ xor r15,rax
+
+ mov QWORD PTR[112+rsp],r12
+ xor r14,rcx
+ and r15,r10
+
+ ror r13,4
+ add r12,rbx
+ xor r15,rax
+
+ ror r14,6
+ xor r13,r10
+ add r12,r15
+
+ mov r15,rcx
+ add r12,QWORD PTR[rbp]
+ xor r14,rcx
+
+ xor r15,rdx
+ ror r13,14
+ mov rbx,rdx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rbx,rdi
+ add r9,r12
+ add rbx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ add rbx,r14
+ mov r12,QWORD PTR[120+rsi]
+ mov r13,r9
+ mov r14,rbx
+ bswap r12
+ ror r13,23
+ mov rdi,r10
+
+ xor r13,r9
+ ror r14,5
+ xor rdi,r11
+
+ mov QWORD PTR[120+rsp],r12
+ xor r14,rbx
+ and rdi,r9
+
+ ror r13,4
+ add r12,rax
+ xor rdi,r11
+
+ ror r14,6
+ xor r13,r9
+ add r12,rdi
+
+ mov rdi,rbx
+ add r12,QWORD PTR[rbp]
+ xor r14,rbx
+
+ xor rdi,rcx
+ ror r13,14
+ mov rax,rcx
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rax,r15
+ add r8,r12
+ add rax,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ jmp $L$rounds_16_xx
+ALIGN 16
+$L$rounds_16_xx::
+ mov r13,QWORD PTR[8+rsp]
+ mov r15,QWORD PTR[112+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rax,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[72+rsp]
+
+ add r12,QWORD PTR[rsp]
+ mov r13,r8
+ add r12,r15
+ mov r14,rax
+ ror r13,23
+ mov r15,r9
+
+ xor r13,r8
+ ror r14,5
+ xor r15,r10
+
+ mov QWORD PTR[rsp],r12
+ xor r14,rax
+ and r15,r8
+
+ ror r13,4
+ add r12,r11
+ xor r15,r10
+
+ ror r14,6
+ xor r13,r8
+ add r12,r15
+
+ mov r15,rax
+ add r12,QWORD PTR[rbp]
+ xor r14,rax
+
+ xor r15,rbx
+ ror r13,14
+ mov r11,rbx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r11,rdi
+ add rdx,r12
+ add r11,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[16+rsp]
+ mov rdi,QWORD PTR[120+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r11,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[80+rsp]
+
+ add r12,QWORD PTR[8+rsp]
+ mov r13,rdx
+ add r12,rdi
+ mov r14,r11
+ ror r13,23
+ mov rdi,r8
+
+ xor r13,rdx
+ ror r14,5
+ xor rdi,r9
+
+ mov QWORD PTR[8+rsp],r12
+ xor r14,r11
+ and rdi,rdx
+
+ ror r13,4
+ add r12,r10
+ xor rdi,r9
+
+ ror r14,6
+ xor r13,rdx
+ add r12,rdi
+
+ mov rdi,r11
+ add r12,QWORD PTR[rbp]
+ xor r14,r11
+
+ xor rdi,rax
+ ror r13,14
+ mov r10,rax
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r10,r15
+ add rcx,r12
+ add r10,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r15,QWORD PTR[rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r10,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[88+rsp]
+
+ add r12,QWORD PTR[16+rsp]
+ mov r13,rcx
+ add r12,r15
+ mov r14,r10
+ ror r13,23
+ mov r15,rdx
+
+ xor r13,rcx
+ ror r14,5
+ xor r15,r8
+
+ mov QWORD PTR[16+rsp],r12
+ xor r14,r10
+ and r15,rcx
+
+ ror r13,4
+ add r12,r9
+ xor r15,r8
+
+ ror r14,6
+ xor r13,rcx
+ add r12,r15
+
+ mov r15,r10
+ add r12,QWORD PTR[rbp]
+ xor r14,r10
+
+ xor r15,r11
+ ror r13,14
+ mov r9,r11
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r9,rdi
+ add rbx,r12
+ add r9,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[32+rsp]
+ mov rdi,QWORD PTR[8+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r9,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[96+rsp]
+
+ add r12,QWORD PTR[24+rsp]
+ mov r13,rbx
+ add r12,rdi
+ mov r14,r9
+ ror r13,23
+ mov rdi,rcx
+
+ xor r13,rbx
+ ror r14,5
+ xor rdi,rdx
+
+ mov QWORD PTR[24+rsp],r12
+ xor r14,r9
+ and rdi,rbx
+
+ ror r13,4
+ add r12,r8
+ xor rdi,rdx
+
+ ror r14,6
+ xor r13,rbx
+ add r12,rdi
+
+ mov rdi,r9
+ add r12,QWORD PTR[rbp]
+ xor r14,r9
+
+ xor rdi,r10
+ ror r13,14
+ mov r8,r10
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r8,r15
+ add rax,r12
+ add r8,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[40+rsp]
+ mov r15,QWORD PTR[16+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r8,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[104+rsp]
+
+ add r12,QWORD PTR[32+rsp]
+ mov r13,rax
+ add r12,r15
+ mov r14,r8
+ ror r13,23
+ mov r15,rbx
+
+ xor r13,rax
+ ror r14,5
+ xor r15,rcx
+
+ mov QWORD PTR[32+rsp],r12
+ xor r14,r8
+ and r15,rax
+
+ ror r13,4
+ add r12,rdx
+ xor r15,rcx
+
+ ror r14,6
+ xor r13,rax
+ add r12,r15
+
+ mov r15,r8
+ add r12,QWORD PTR[rbp]
+ xor r14,r8
+
+ xor r15,r9
+ ror r13,14
+ mov rdx,r9
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rdx,rdi
+ add r11,r12
+ add rdx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[48+rsp]
+ mov rdi,QWORD PTR[24+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rdx,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[112+rsp]
+
+ add r12,QWORD PTR[40+rsp]
+ mov r13,r11
+ add r12,rdi
+ mov r14,rdx
+ ror r13,23
+ mov rdi,rax
+
+ xor r13,r11
+ ror r14,5
+ xor rdi,rbx
+
+ mov QWORD PTR[40+rsp],r12
+ xor r14,rdx
+ and rdi,r11
+
+ ror r13,4
+ add r12,rcx
+ xor rdi,rbx
+
+ ror r14,6
+ xor r13,r11
+ add r12,rdi
+
+ mov rdi,rdx
+ add r12,QWORD PTR[rbp]
+ xor r14,rdx
+
+ xor rdi,r8
+ ror r13,14
+ mov rcx,r8
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rcx,r15
+ add r10,r12
+ add rcx,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[56+rsp]
+ mov r15,QWORD PTR[32+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rcx,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[120+rsp]
+
+ add r12,QWORD PTR[48+rsp]
+ mov r13,r10
+ add r12,r15
+ mov r14,rcx
+ ror r13,23
+ mov r15,r11
+
+ xor r13,r10
+ ror r14,5
+ xor r15,rax
+
+ mov QWORD PTR[48+rsp],r12
+ xor r14,rcx
+ and r15,r10
+
+ ror r13,4
+ add r12,rbx
+ xor r15,rax
+
+ ror r14,6
+ xor r13,r10
+ add r12,r15
+
+ mov r15,rcx
+ add r12,QWORD PTR[rbp]
+ xor r14,rcx
+
+ xor r15,rdx
+ ror r13,14
+ mov rbx,rdx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rbx,rdi
+ add r9,r12
+ add rbx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[64+rsp]
+ mov rdi,QWORD PTR[40+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rbx,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[rsp]
+
+ add r12,QWORD PTR[56+rsp]
+ mov r13,r9
+ add r12,rdi
+ mov r14,rbx
+ ror r13,23
+ mov rdi,r10
+
+ xor r13,r9
+ ror r14,5
+ xor rdi,r11
+
+ mov QWORD PTR[56+rsp],r12
+ xor r14,rbx
+ and rdi,r9
+
+ ror r13,4
+ add r12,rax
+ xor rdi,r11
+
+ ror r14,6
+ xor r13,r9
+ add r12,rdi
+
+ mov rdi,rbx
+ add r12,QWORD PTR[rbp]
+ xor r14,rbx
+
+ xor rdi,rcx
+ ror r13,14
+ mov rax,rcx
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rax,r15
+ add r8,r12
+ add rax,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[72+rsp]
+ mov r15,QWORD PTR[48+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rax,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[8+rsp]
+
+ add r12,QWORD PTR[64+rsp]
+ mov r13,r8
+ add r12,r15
+ mov r14,rax
+ ror r13,23
+ mov r15,r9
+
+ xor r13,r8
+ ror r14,5
+ xor r15,r10
+
+ mov QWORD PTR[64+rsp],r12
+ xor r14,rax
+ and r15,r8
+
+ ror r13,4
+ add r12,r11
+ xor r15,r10
+
+ ror r14,6
+ xor r13,r8
+ add r12,r15
+
+ mov r15,rax
+ add r12,QWORD PTR[rbp]
+ xor r14,rax
+
+ xor r15,rbx
+ ror r13,14
+ mov r11,rbx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r11,rdi
+ add rdx,r12
+ add r11,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[80+rsp]
+ mov rdi,QWORD PTR[56+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r11,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[16+rsp]
+
+ add r12,QWORD PTR[72+rsp]
+ mov r13,rdx
+ add r12,rdi
+ mov r14,r11
+ ror r13,23
+ mov rdi,r8
+
+ xor r13,rdx
+ ror r14,5
+ xor rdi,r9
+
+ mov QWORD PTR[72+rsp],r12
+ xor r14,r11
+ and rdi,rdx
+
+ ror r13,4
+ add r12,r10
+ xor rdi,r9
+
+ ror r14,6
+ xor r13,rdx
+ add r12,rdi
+
+ mov rdi,r11
+ add r12,QWORD PTR[rbp]
+ xor r14,r11
+
+ xor rdi,rax
+ ror r13,14
+ mov r10,rax
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r10,r15
+ add rcx,r12
+ add r10,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r15,QWORD PTR[64+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r10,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[24+rsp]
+
+ add r12,QWORD PTR[80+rsp]
+ mov r13,rcx
+ add r12,r15
+ mov r14,r10
+ ror r13,23
+ mov r15,rdx
+
+ xor r13,rcx
+ ror r14,5
+ xor r15,r8
+
+ mov QWORD PTR[80+rsp],r12
+ xor r14,r10
+ and r15,rcx
+
+ ror r13,4
+ add r12,r9
+ xor r15,r8
+
+ ror r14,6
+ xor r13,rcx
+ add r12,r15
+
+ mov r15,r10
+ add r12,QWORD PTR[rbp]
+ xor r14,r10
+
+ xor r15,r11
+ ror r13,14
+ mov r9,r11
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor r9,rdi
+ add rbx,r12
+ add r9,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[96+rsp]
+ mov rdi,QWORD PTR[72+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r9,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[32+rsp]
+
+ add r12,QWORD PTR[88+rsp]
+ mov r13,rbx
+ add r12,rdi
+ mov r14,r9
+ ror r13,23
+ mov rdi,rcx
+
+ xor r13,rbx
+ ror r14,5
+ xor rdi,rdx
+
+ mov QWORD PTR[88+rsp],r12
+ xor r14,r9
+ and rdi,rbx
+
+ ror r13,4
+ add r12,r8
+ xor rdi,rdx
+
+ ror r14,6
+ xor r13,rbx
+ add r12,rdi
+
+ mov rdi,r9
+ add r12,QWORD PTR[rbp]
+ xor r14,r9
+
+ xor rdi,r10
+ ror r13,14
+ mov r8,r10
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor r8,r15
+ add rax,r12
+ add r8,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[104+rsp]
+ mov r15,QWORD PTR[80+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add r8,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[40+rsp]
+
+ add r12,QWORD PTR[96+rsp]
+ mov r13,rax
+ add r12,r15
+ mov r14,r8
+ ror r13,23
+ mov r15,rbx
+
+ xor r13,rax
+ ror r14,5
+ xor r15,rcx
+
+ mov QWORD PTR[96+rsp],r12
+ xor r14,r8
+ and r15,rax
+
+ ror r13,4
+ add r12,rdx
+ xor r15,rcx
+
+ ror r14,6
+ xor r13,rax
+ add r12,r15
+
+ mov r15,r8
+ add r12,QWORD PTR[rbp]
+ xor r14,r8
+
+ xor r15,r9
+ ror r13,14
+ mov rdx,r9
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rdx,rdi
+ add r11,r12
+ add rdx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[112+rsp]
+ mov rdi,QWORD PTR[88+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rdx,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[48+rsp]
+
+ add r12,QWORD PTR[104+rsp]
+ mov r13,r11
+ add r12,rdi
+ mov r14,rdx
+ ror r13,23
+ mov rdi,rax
+
+ xor r13,r11
+ ror r14,5
+ xor rdi,rbx
+
+ mov QWORD PTR[104+rsp],r12
+ xor r14,rdx
+ and rdi,r11
+
+ ror r13,4
+ add r12,rcx
+ xor rdi,rbx
+
+ ror r14,6
+ xor r13,r11
+ add r12,rdi
+
+ mov rdi,rdx
+ add r12,QWORD PTR[rbp]
+ xor r14,rdx
+
+ xor rdi,r8
+ ror r13,14
+ mov rcx,r8
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rcx,r15
+ add r10,r12
+ add rcx,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ mov r13,QWORD PTR[120+rsp]
+ mov r15,QWORD PTR[96+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rcx,r14
+ mov r14,r15
+ ror r15,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ xor r12,r13
+ xor r15,r14
+ add r12,QWORD PTR[56+rsp]
+
+ add r12,QWORD PTR[112+rsp]
+ mov r13,r10
+ add r12,r15
+ mov r14,rcx
+ ror r13,23
+ mov r15,r11
+
+ xor r13,r10
+ ror r14,5
+ xor r15,rax
+
+ mov QWORD PTR[112+rsp],r12
+ xor r14,rcx
+ and r15,r10
+
+ ror r13,4
+ add r12,rbx
+ xor r15,rax
+
+ ror r14,6
+ xor r13,r10
+ add r12,r15
+
+ mov r15,rcx
+ add r12,QWORD PTR[rbp]
+ xor r14,rcx
+
+ xor r15,rdx
+ ror r13,14
+ mov rbx,rdx
+
+ and rdi,r15
+ ror r14,28
+ add r12,r13
+
+ xor rbx,rdi
+ add r9,r12
+ add rbx,r12
+
+ lea rbp,QWORD PTR[8+rbp]
+ mov r13,QWORD PTR[rsp]
+ mov rdi,QWORD PTR[104+rsp]
+
+ mov r12,r13
+ ror r13,7
+ add rbx,r14
+ mov r14,rdi
+ ror rdi,42
+
+ xor r13,r12
+ shr r12,7
+ ror r13,1
+ xor rdi,r14
+ shr r14,6
+
+ ror rdi,19
+ xor r12,r13
+ xor rdi,r14
+ add r12,QWORD PTR[64+rsp]
+
+ add r12,QWORD PTR[120+rsp]
+ mov r13,r9
+ add r12,rdi
+ mov r14,rbx
+ ror r13,23
+ mov rdi,r10
+
+ xor r13,r9
+ ror r14,5
+ xor rdi,r11
+
+ mov QWORD PTR[120+rsp],r12
+ xor r14,rbx
+ and rdi,r9
+
+ ror r13,4
+ add r12,rax
+ xor rdi,r11
+
+ ror r14,6
+ xor r13,r9
+ add r12,rdi
+
+ mov rdi,rbx
+ add r12,QWORD PTR[rbp]
+ xor r14,rbx
+
+ xor rdi,rcx
+ ror r13,14
+ mov rax,rcx
+
+ and r15,rdi
+ ror r14,28
+ add r12,r13
+
+ xor rax,r15
+ add r8,r12
+ add rax,r12
+
+ lea rbp,QWORD PTR[24+rbp]
+ cmp BYTE PTR[7+rbp],0
+ jnz $L$rounds_16_xx
+
+ mov rdi,QWORD PTR[((128+0))+rsp]
+ add rax,r14
+ lea rsi,QWORD PTR[128+rsi]
+
+ add rax,QWORD PTR[rdi]
+ add rbx,QWORD PTR[8+rdi]
+ add rcx,QWORD PTR[16+rdi]
+ add rdx,QWORD PTR[24+rdi]
+ add r8,QWORD PTR[32+rdi]
+ add r9,QWORD PTR[40+rdi]
+ add r10,QWORD PTR[48+rdi]
+ add r11,QWORD PTR[56+rdi]
+
+ cmp rsi,QWORD PTR[((128+16))+rsp]
+
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+ mov QWORD PTR[32+rdi],r8
+ mov QWORD PTR[40+rdi],r9
+ mov QWORD PTR[48+rdi],r10
+ mov QWORD PTR[56+rdi],r11
+ jb $L$loop
+
+ mov rsi,QWORD PTR[((128+24))+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha512_block_data_order::
+sha512_block_data_order ENDP
+ALIGN 64
+
+K512::
+ DQ 0428a2f98d728ae22h,07137449123ef65cdh
+ DQ 0428a2f98d728ae22h,07137449123ef65cdh
+ DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch
+ DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch
+ DQ 03956c25bf348b538h,059f111f1b605d019h
+ DQ 03956c25bf348b538h,059f111f1b605d019h
+ DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h
+ DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h
+ DQ 0d807aa98a3030242h,012835b0145706fbeh
+ DQ 0d807aa98a3030242h,012835b0145706fbeh
+ DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h
+ DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h
+ DQ 072be5d74f27b896fh,080deb1fe3b1696b1h
+ DQ 072be5d74f27b896fh,080deb1fe3b1696b1h
+ DQ 09bdc06a725c71235h,0c19bf174cf692694h
+ DQ 09bdc06a725c71235h,0c19bf174cf692694h
+ DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h
+ DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h
+ DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h
+ DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h
+ DQ 02de92c6f592b0275h,04a7484aa6ea6e483h
+ DQ 02de92c6f592b0275h,04a7484aa6ea6e483h
+ DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h
+ DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h
+ DQ 0983e5152ee66dfabh,0a831c66d2db43210h
+ DQ 0983e5152ee66dfabh,0a831c66d2db43210h
+ DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h
+ DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h
+ DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h
+ DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h
+ DQ 006ca6351e003826fh,0142929670a0e6e70h
+ DQ 006ca6351e003826fh,0142929670a0e6e70h
+ DQ 027b70a8546d22ffch,02e1b21385c26c926h
+ DQ 027b70a8546d22ffch,02e1b21385c26c926h
+ DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh
+ DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh
+ DQ 0650a73548baf63deh,0766a0abb3c77b2a8h
+ DQ 0650a73548baf63deh,0766a0abb3c77b2a8h
+ DQ 081c2c92e47edaee6h,092722c851482353bh
+ DQ 081c2c92e47edaee6h,092722c851482353bh
+ DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h
+ DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h
+ DQ 0c24b8b70d0f89791h,0c76c51a30654be30h
+ DQ 0c24b8b70d0f89791h,0c76c51a30654be30h
+ DQ 0d192e819d6ef5218h,0d69906245565a910h
+ DQ 0d192e819d6ef5218h,0d69906245565a910h
+ DQ 0f40e35855771202ah,0106aa07032bbd1b8h
+ DQ 0f40e35855771202ah,0106aa07032bbd1b8h
+ DQ 019a4c116b8d2d0c8h,01e376c085141ab53h
+ DQ 019a4c116b8d2d0c8h,01e376c085141ab53h
+ DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h
+ DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h
+ DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh
+ DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh
+ DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h
+ DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h
+ DQ 0748f82ee5defb2fch,078a5636f43172f60h
+ DQ 0748f82ee5defb2fch,078a5636f43172f60h
+ DQ 084c87814a1f0ab72h,08cc702081a6439ech
+ DQ 084c87814a1f0ab72h,08cc702081a6439ech
+ DQ 090befffa23631e28h,0a4506cebde82bde9h
+ DQ 090befffa23631e28h,0a4506cebde82bde9h
+ DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh
+ DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh
+ DQ 0ca273eceea26619ch,0d186b8c721c0c207h
+ DQ 0ca273eceea26619ch,0d186b8c721c0c207h
+ DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h
+ DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h
+ DQ 006f067aa72176fbah,00a637dc5a2c898a6h
+ DQ 006f067aa72176fbah,00a637dc5a2c898a6h
+ DQ 0113f9804bef90daeh,01b710b35131c471bh
+ DQ 0113f9804bef90daeh,01b710b35131c471bh
+ DQ 028db77f523047d84h,032caab7b40c72493h
+ DQ 028db77f523047d84h,032caab7b40c72493h
+ DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch
+ DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch
+ DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah
+ DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah
+ DQ 05fcb6fab3ad6faech,06c44198c4a475817h
+ DQ 05fcb6fab3ad6faech,06c44198c4a475817h
+
+ DQ 00001020304050607h,008090a0b0c0d0e0fh
+ DQ 00001020304050607h,008090a0b0c0d0e0fh
+DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB 111,114,103,62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+ mov rsi,rax
+ mov rax,QWORD PTR[((128+24))+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ lea rsi,QWORD PTR[((128+32))+rsi]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,12
+ DD 0a548f3fch
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_sha512_block_data_order
+ DD imagerel $L$SEH_end_sha512_block_data_order
+ DD imagerel $L$SEH_info_sha512_block_data_order
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_sha512_block_data_order::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$prologue,imagerel $L$epilogue
+
+.xdata ENDS
+END