diff options
author | Adam Langley <agl@google.com> | 2015-01-22 14:27:53 -0800 |
---|---|---|
committer | Adam Langley <agl@google.com> | 2015-01-30 16:52:14 -0800 |
commit | d9e397b599b13d642138480a28c14db7a136bf05 (patch) | |
tree | 34bab61dc4ce323b123ad4614dbc07e86ea2f9ef /win-x86_64 | |
download | external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.zip external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.gz external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.bz2 |
Initial commit of BoringSSL for Android.
Diffstat (limited to 'win-x86_64')
-rw-r--r-- | win-x86_64/crypto/aes/aes-x86_64.asm | 2864 | ||||
-rw-r--r-- | win-x86_64/crypto/aes/aesni-x86_64.asm | 3631 | ||||
-rw-r--r-- | win-x86_64/crypto/aes/bsaes-x86_64.asm | 2734 | ||||
-rw-r--r-- | win-x86_64/crypto/aes/vpaes-x86_64.asm | 1143 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/modexp512-x86_64.asm | 1887 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/rsaz-avx2.asm | 29 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/rsaz-x86_64.asm | 1326 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/x86_64-mont.asm | 945 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/x86_64-mont5.asm | 2061 | ||||
-rw-r--r-- | win-x86_64/crypto/cpu-x86_64-asm.asm | 158 | ||||
-rw-r--r-- | win-x86_64/crypto/md5/md5-x86_64.asm | 778 | ||||
-rw-r--r-- | win-x86_64/crypto/modes/aesni-gcm-x86_64.asm | 19 | ||||
-rw-r--r-- | win-x86_64/crypto/modes/ghash-x86_64.asm | 1510 | ||||
-rw-r--r-- | win-x86_64/crypto/rc4/rc4-md5-x86_64.asm | 1374 | ||||
-rw-r--r-- | win-x86_64/crypto/rc4/rc4-x86_64.asm | 773 | ||||
-rw-r--r-- | win-x86_64/crypto/sha/sha1-x86_64.asm | 2619 | ||||
-rw-r--r-- | win-x86_64/crypto/sha/sha256-x86_64.asm | 2997 | ||||
-rw-r--r-- | win-x86_64/crypto/sha/sha512-x86_64.asm | 1913 |
18 files changed, 28761 insertions, 0 deletions
diff --git a/win-x86_64/crypto/aes/aes-x86_64.asm b/win-x86_64/crypto/aes/aes-x86_64.asm new file mode 100644 index 0000000..96cbb4b --- /dev/null +++ b/win-x86_64/crypto/aes/aes-x86_64.asm @@ -0,0 +1,2864 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +ALIGN 16 +_x86_64_AES_encrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$enc_loop +ALIGN 16 +$L$enc_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + shr ecx,16 + movzx ebp,ah + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + mov edx,DWORD PTR[12+r15] + movzx edi,bh + movzx ebp,ch + mov eax,DWORD PTR[r15] + xor r12d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[1+rbp*8+r14] + + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + sub r13d,1 + jnz $L$enc_loop + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[2+rsi*8+r14] + movzx r11d,BYTE PTR[2+rdi*8+r14] + movzx r12d,BYTE PTR[2+rbp*8+r14] + + movzx esi,dl + movzx edi,bh + movzx ebp,ch + movzx r8d,BYTE PTR[2+rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and edi,00000ff00h + and ebp,00000ff00h + + xor r10d,edi + xor r11d,ebp + shr ecx,16 + + movzx esi,dh + movzx edi,ah + shr edx,16 + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + + and esi,00000ff00h + and edi,00000ff00h + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and esi,000ff0000h + and edi,000ff0000h + and ebp,000ff0000h + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,dh + movzx ebp,ah + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov ebp,DWORD PTR[2+rbp*8+r14] + + and esi,000ff0000h + and edi,0ff000000h + and ebp,0ff000000h + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,bh + movzx edi,ch + mov edx,DWORD PTR[((16+12))+r15] + mov esi,DWORD PTR[2+rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov eax,DWORD PTR[((16+0))+r15] + + and esi,0ff000000h + and edi,0ff000000h + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0f3h,0c3h +_x86_64_AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_encrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,bh + movzx edi,ch + shr ecx,16 + movzx ebp,dh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + movzx r8d,BYTE PTR[r8*1+r14] + + movzx r9d,BYTE PTR[rsi*1+r14] + movzx esi,ah + movzx r13d,BYTE PTR[rdi*1+r14] + movzx edi,cl + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + + shl r9d,8 + shr edx,16 + shl r13d,8 + xor r10d,r9d + shr eax,16 + movzx r9d,dl + shr ebx,16 + xor r11d,r13d + shl ebp,8 + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] + xor r12d,ebp + + shl esi,8 + movzx ebp,bl + shl edi,16 + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,dh + movzx r13d,BYTE PTR[r13*1+r14] + xor r10d,edi + + shr ecx,8 + movzx edi,ah + shl r9d,16 + shr ebx,8 + shl r13d,16 + xor r11d,r9d + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx edx,BYTE PTR[rcx*1+r14] + movzx ecx,BYTE PTR[rbx*1+r14] + + shl ebp,16 + xor r12d,r13d + shl esi,24 + xor r8d,ebp + shl edi,24 + xor r10d,esi + shl edx,24 + xor r11d,edi + shl ecx,24 + mov eax,r10d + mov ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$enc_compact_done + mov r10d,080808080h + mov r11d,080808080h + and r10d,eax + and r11d,ebx + mov esi,r10d + mov edi,r11d + shr r10d,7 + lea r8d,DWORD PTR[rax*1+rax] + shr r11d,7 + lea r9d,DWORD PTR[rbx*1+rbx] + sub esi,r10d + sub edi,r11d + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r10d,eax + mov r11d,ebx + xor r8d,esi + xor r9d,edi + + xor eax,r8d + xor ebx,r9d + mov r12d,080808080h + rol eax,24 + mov ebp,080808080h + rol ebx,24 + and r12d,ecx + and ebp,edx + xor eax,r8d + xor ebx,r9d + mov esi,r12d + ror r10d,16 + mov edi,ebp + ror r11d,16 + lea r8d,DWORD PTR[rcx*1+rcx] + shr r12d,7 + xor eax,r10d + shr ebp,7 + xor ebx,r11d + ror r10d,8 + lea r9d,DWORD PTR[rdx*1+rdx] + ror r11d,8 + sub esi,r12d + sub edi,ebp + xor eax,r10d + xor ebx,r11d + + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r12d,ecx + mov ebp,edx + xor r8d,esi + xor r9d,edi + + ror r12d,16 + xor ecx,r8d + ror ebp,16 + xor edx,r9d + rol ecx,24 + mov esi,DWORD PTR[r14] + rol edx,24 + xor ecx,r8d + mov edi,DWORD PTR[64+r14] + xor edx,r9d + mov r8d,DWORD PTR[128+r14] + xor ecx,r12d + ror r12d,8 + xor edx,ebp + ror ebp,8 + xor ecx,r12d + mov r9d,DWORD PTR[192+r14] + xor edx,ebp + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] +DB 0f3h,0c3h +_x86_64_AES_encrypt_compact ENDP +ALIGN 16 +PUBLIC asm_AES_encrypt + + +asm_AES_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$enc_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Te+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + + call _x86_64_AES_encrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_encrypt:: +asm_AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$dec_loop +ALIGN 16 +$L$dec_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + shr eax,16 + movzx ebp,ch + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + mov edx,DWORD PTR[12+r15] + movzx ebp,ah + xor r12d,DWORD PTR[1+rsi*8+r14] + mov eax,DWORD PTR[r15] + xor r8d,DWORD PTR[1+rbp*8+r14] + + xor eax,r10d + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor ecx,r12d + xor ebx,r11d + xor edx,r8d + sub r13d,1 + jnz $L$dec_loop + lea r14,QWORD PTR[2048+r14] + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[rsi*1+r14] + movzx r11d,BYTE PTR[rdi*1+r14] + movzx r12d,BYTE PTR[rbp*1+r14] + + movzx esi,dl + movzx edi,dh + movzx ebp,ah + movzx r8d,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl edi,8 + shl ebp,8 + + xor r10d,edi + xor r11d,ebp + shr edx,16 + + movzx esi,bh + movzx edi,ch + shr eax,16 + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + + shl esi,8 + shl edi,8 + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,16 + shl ebp,16 + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,bh + movzx ebp,ch + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,24 + shl ebp,24 + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,dh + movzx edi,ah + mov edx,DWORD PTR[((16+12))+r15] + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + mov eax,DWORD PTR[((16+0))+r15] + + shl esi,24 + shl edi,24 + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + lea r14,QWORD PTR[((-2048))+r14] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0f3h,0c3h +_x86_64_AES_decrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$dec_loop_compact + +ALIGN 16 +$L$dec_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,dh + movzx edi,ah + shr edx,16 + movzx ebp,bh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + movzx r8d,BYTE PTR[r8*1+r14] + + movzx r9d,BYTE PTR[rsi*1+r14] + movzx esi,ch + movzx r13d,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + + shr ecx,16 + shl r13d,8 + shl r9d,8 + movzx edi,cl + shr eax,16 + xor r10d,r9d + shr ebx,16 + movzx r9d,dl + + shl ebp,8 + xor r11d,r13d + shl esi,8 + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] + xor r12d,ebp + movzx ebp,bl + + shl edi,16 + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,bh + movzx ebp,BYTE PTR[rbp*1+r14] + xor r10d,edi + movzx r13d,BYTE PTR[r13*1+r14] + movzx edi,ch + + shl ebp,16 + shl r9d,16 + shl r13d,16 + xor r8d,ebp + movzx ebp,dh + xor r11d,r9d + shr eax,8 + xor r12d,r13d + + movzx esi,BYTE PTR[rsi*1+r14] + movzx ebx,BYTE PTR[rdi*1+r14] + movzx ecx,BYTE PTR[rbp*1+r14] + movzx edx,BYTE PTR[rax*1+r14] + + mov eax,r10d + shl esi,24 + shl ebx,24 + shl ecx,24 + xor eax,esi + shl edx,24 + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$dec_compact_done + + mov rsi,QWORD PTR[((256+0))+r14] + shl rbx,32 + shl rdx,32 + mov rdi,QWORD PTR[((256+8))+r14] + or rax,rbx + or rcx,rdx + mov rbp,QWORD PTR[((256+16))+r14] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + mov rsi,QWORD PTR[r14] + rol r9d,16 + mov rdi,QWORD PTR[64+r14] + rol r12d,16 + mov rbp,QWORD PTR[128+r14] + rol r8d,16 + mov r10,QWORD PTR[192+r14] + xor eax,r9d + rol r11d,16 + xor ecx,r12d + mov r13,QWORD PTR[256+r14] + xor ebx,r8d + xor edx,r11d + jmp $L$dec_loop_compact +ALIGN 16 +$L$dec_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] +DB 0f3h,0c3h +_x86_64_AES_decrypt_compact ENDP +ALIGN 16 +PUBLIC asm_AES_decrypt + + +asm_AES_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$dec_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Td+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + shr rbp,3 + add r14,rbp + + call _x86_64_AES_decrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_decrypt:: +asm_AES_decrypt ENDP +ALIGN 16 +PUBLIC asm_AES_set_encrypt_key + +asm_AES_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,8 +$L$enc_key_prologue:: + + call _x86_64_AES_set_encrypt_key + + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$enc_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_set_encrypt_key:: +asm_AES_set_encrypt_key ENDP + + +ALIGN 16 +_x86_64_AES_set_encrypt_key PROC PRIVATE + mov ecx,esi + mov rsi,rdi + mov rdi,rdx + + test rsi,-1 + jz $L$badpointer + test rdi,-1 + jz $L$badpointer + + lea rbp,QWORD PTR[$L$AES_Te] + lea rbp,QWORD PTR[((2048+128))+rbp] + + + mov eax,DWORD PTR[((0-128))+rbp] + mov ebx,DWORD PTR[((32-128))+rbp] + mov r8d,DWORD PTR[((64-128))+rbp] + mov edx,DWORD PTR[((96-128))+rbp] + mov eax,DWORD PTR[((128-128))+rbp] + mov ebx,DWORD PTR[((160-128))+rbp] + mov r8d,DWORD PTR[((192-128))+rbp] + mov edx,DWORD PTR[((224-128))+rbp] + + cmp ecx,128 + je $L$10rounds + cmp ecx,192 + je $L$12rounds + cmp ecx,256 + je $L$14rounds + mov rax,-2 + jmp $L$exit + +$L$10rounds:: + mov rax,QWORD PTR[rsi] + mov rdx,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$10shortcut +ALIGN 4 +$L$10loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[12+rdi] +$L$10shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[16+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[20+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[28+rdi],eax + add ecx,1 + lea rdi,QWORD PTR[16+rdi] + cmp ecx,10 + jl $L$10loop + + mov DWORD PTR[80+rdi],10 + xor rax,rax + jmp $L$exit + +$L$12rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdx,QWORD PTR[16+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$12shortcut +ALIGN 4 +$L$12loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[20+rdi] +$L$12shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[28+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[36+rdi],eax + + cmp ecx,7 + je $L$12break + add ecx,1 + + xor eax,DWORD PTR[16+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[44+rdi],eax + + lea rdi,QWORD PTR[24+rdi] + jmp $L$12loop +$L$12break:: + mov DWORD PTR[72+rdi],12 + xor rax,rax + jmp $L$exit + +$L$14rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$14shortcut +ALIGN 4 +$L$14loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[28+rdi] +$L$14shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[36+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[44+rdi],eax + + cmp ecx,6 + je $L$14break + add ecx,1 + + mov edx,eax + mov eax,DWORD PTR[16+rdi] + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,16 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,24 + xor eax,ebx + + mov DWORD PTR[48+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[52+rdi],eax + xor eax,DWORD PTR[24+rdi] + mov DWORD PTR[56+rdi],eax + xor eax,DWORD PTR[28+rdi] + mov DWORD PTR[60+rdi],eax + + lea rdi,QWORD PTR[32+rdi] + jmp $L$14loop +$L$14break:: + mov DWORD PTR[48+rdi],14 + xor rax,rax + jmp $L$exit + +$L$badpointer:: + mov rax,-1 +$L$exit:: +DB 0f3h,0c3h +_x86_64_AES_set_encrypt_key ENDP +ALIGN 16 +PUBLIC asm_AES_set_decrypt_key + +asm_AES_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + push rdx +$L$dec_key_prologue:: + + call _x86_64_AES_set_encrypt_key + mov r8,QWORD PTR[rsp] + cmp eax,0 + jne $L$abort + + mov r14d,DWORD PTR[240+r8] + xor rdi,rdi + lea rcx,QWORD PTR[r14*4+rdi] + mov rsi,r8 + lea rdi,QWORD PTR[rcx*4+r8] +ALIGN 4 +$L$invert:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[rdi] + mov rdx,QWORD PTR[8+rdi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[rsi],rcx + mov QWORD PTR[8+rsi],rdx + lea rsi,QWORD PTR[16+rsi] + lea rdi,QWORD PTR[((-16))+rdi] + cmp rdi,rsi + jne $L$invert + + lea rax,QWORD PTR[(($L$AES_Te+2048+1024))] + + mov rsi,QWORD PTR[40+rax] + mov rdi,QWORD PTR[48+rax] + mov rbp,QWORD PTR[56+rax] + + mov r15,r8 + sub r14d,1 +ALIGN 4 +$L$permute:: + lea r15,QWORD PTR[16+r15] + mov rax,QWORD PTR[r15] + mov rcx,QWORD PTR[8+r15] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + + rol r9d,16 + + rol r12d,16 + + rol r8d,16 + + xor eax,r9d + rol r11d,16 + xor ecx,r12d + + xor ebx,r8d + xor edx,r11d + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + sub r14d,1 + jnz $L$permute + + xor rax,rax +$L$abort:: + mov r15,QWORD PTR[8+rsp] + mov r14,QWORD PTR[16+rsp] + mov r13,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$dec_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_set_decrypt_key:: +asm_AES_set_decrypt_key ENDP +ALIGN 16 +PUBLIC asm_AES_cbc_encrypt + +EXTERN OPENSSL_ia32cap_P:NEAR + +asm_AES_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp rdx,0 + je $L$cbc_epilogue + pushfq + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +$L$cbc_prologue:: + + cld + mov r9d,r9d + + lea r14,QWORD PTR[$L$AES_Te] + cmp r9,0 + jne $L$cbc_picked_te + lea r14,QWORD PTR[$L$AES_Td] +$L$cbc_picked_te:: + + mov r10d,DWORD PTR[OPENSSL_ia32cap_P] + cmp rdx,512 + jb $L$cbc_slow_prologue + test rdx,15 + jnz $L$cbc_slow_prologue + bt r10d,28 + jc $L$cbc_slow_prologue + + + lea r15,QWORD PTR[((-88-248))+rsp] + and r15,-64 + + + mov r10,r14 + lea r11,QWORD PTR[2304+r14] + mov r12,r15 + and r10,0FFFh + and r11,0FFFh + and r12,0FFFh + + cmp r12,r11 + jb $L$cbc_te_break_out + sub r12,r11 + sub r15,r12 + jmp $L$cbc_te_ok +$L$cbc_te_break_out:: + sub r12,r10 + and r12,0FFFh + add r12,320 + sub r15,r12 +ALIGN 4 +$L$cbc_te_ok:: + + xchg r15,rsp + + mov QWORD PTR[16+rsp],r15 +$L$cbc_fast_body:: + mov QWORD PTR[24+rsp],rdi + mov QWORD PTR[32+rsp],rsi + mov QWORD PTR[40+rsp],rdx + mov QWORD PTR[48+rsp],rcx + mov QWORD PTR[56+rsp],r8 + mov DWORD PTR[((80+240))+rsp],0 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + + mov eax,DWORD PTR[240+r15] + + mov r10,r15 + sub r10,r14 + and r10,0fffh + cmp r10,2304 + jb $L$cbc_do_ecopy + cmp r10,4096-248 + jb $L$cbc_skip_ecopy +ALIGN 4 +$L$cbc_do_ecopy:: + mov rsi,r15 + lea rdi,QWORD PTR[80+rsp] + lea r15,QWORD PTR[80+rsp] + mov ecx,240/8 + DD 090A548F3h + mov DWORD PTR[rdi],eax +$L$cbc_skip_ecopy:: + mov QWORD PTR[rsp],r15 + + mov ecx,18 +ALIGN 4 +$L$cbc_prefetch_te:: + mov r10,QWORD PTR[r14] + mov r11,QWORD PTR[32+r14] + mov r12,QWORD PTR[64+r14] + mov r13,QWORD PTR[96+r14] + lea r14,QWORD PTR[128+r14] + sub ecx,1 + jnz $L$cbc_prefetch_te + lea r14,QWORD PTR[((-2304))+r14] + + cmp rbx,0 + je $L$FAST_DECRYPT + + + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + +ALIGN 4 +$L$cbc_fast_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_encrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + mov QWORD PTR[40+rsp],r10 + jnz $L$cbc_fast_enc_loop + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_fast_cleanup + + +ALIGN 16 +$L$FAST_DECRYPT:: + cmp r9,r8 + je $L$cbc_fast_dec_in_place + + mov QWORD PTR[64+rsp],rbp +ALIGN 4 +$L$cbc_fast_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov rbp,QWORD PTR[64+rsp] + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[rbp] + xor ebx,DWORD PTR[4+rbp] + xor ecx,DWORD PTR[8+rbp] + xor edx,DWORD PTR[12+rbp] + mov rbp,r8 + + sub r10,16 + mov QWORD PTR[40+rsp],r10 + mov QWORD PTR[64+rsp],rbp + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jnz $L$cbc_fast_dec_loop + mov r12,QWORD PTR[56+rsp] + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[r12],r10 + mov QWORD PTR[8+r12],r11 + jmp $L$cbc_fast_cleanup + +ALIGN 16 +$L$cbc_fast_dec_in_place:: + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r10 + mov QWORD PTR[((8+64))+rsp],r11 +ALIGN 4 +$L$cbc_fast_dec_in_place_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jz $L$cbc_fast_dec_in_place_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + mov QWORD PTR[40+rsp],r10 + jmp $L$cbc_fast_dec_in_place_loop +$L$cbc_fast_dec_in_place_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + +ALIGN 4 +$L$cbc_fast_cleanup:: + cmp DWORD PTR[((80+240))+rsp],0 + lea rdi,QWORD PTR[80+rsp] + je $L$cbc_exit + mov ecx,240/8 + xor rax,rax + DD 090AB48F3h + + jmp $L$cbc_exit + + +ALIGN 16 +$L$cbc_slow_prologue:: + + lea rbp,QWORD PTR[((-88))+rsp] + and rbp,-64 + + lea r10,QWORD PTR[((-88-63))+rcx] + sub r10,rbp + neg r10 + and r10,03c0h + sub rbp,r10 + + xchg rbp,rsp + + mov QWORD PTR[16+rsp],rbp +$L$cbc_slow_body:: + + + + + mov QWORD PTR[56+rsp],r8 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + mov r10,rdx + + mov eax,DWORD PTR[240+r15] + mov QWORD PTR[rsp],r15 + shl eax,4 + lea rax,QWORD PTR[rax*1+r15] + mov QWORD PTR[8+rsp],rax + + + lea r14,QWORD PTR[2048+r14] + lea rax,QWORD PTR[((768-8))+rsp] + sub rax,r14 + and rax,0300h + lea r14,QWORD PTR[rax*1+r14] + + cmp rbx,0 + je $L$SLOW_DECRYPT + + + test r10,-16 + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + jz $L$cbc_slow_enc_tail + +ALIGN 4 +$L$cbc_slow_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_encrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + jnz $L$cbc_slow_enc_loop + test r10,15 + jnz $L$cbc_slow_enc_tail + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_enc_tail:: + mov r11,rax + mov r12,rcx + mov rcx,r10 + mov rsi,r8 + mov rdi,r9 + DD 09066A4F3h + mov rcx,16 + sub rcx,r10 + xor rax,rax + DD 09066AAF3h + mov r8,r9 + mov r10,16 + mov rax,r11 + mov rcx,r12 + jmp $L$cbc_slow_enc_loop + +ALIGN 16 +$L$SLOW_DECRYPT:: + shr rax,3 + add r14,rax + + mov r11,QWORD PTR[rbp] + mov r12,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + +ALIGN 4 +$L$cbc_slow_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_decrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jc $L$cbc_slow_dec_partial + jz $L$cbc_slow_dec_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jmp $L$cbc_slow_dec_loop +$L$cbc_slow_dec_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_dec_partial:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[((0+64))+rsp],eax + mov DWORD PTR[((4+64))+rsp],ebx + mov DWORD PTR[((8+64))+rsp],ecx + mov DWORD PTR[((12+64))+rsp],edx + + mov rdi,r9 + lea rsi,QWORD PTR[64+rsp] + lea rcx,QWORD PTR[16+r10] + DD 09066A4F3h + jmp $L$cbc_exit + +ALIGN 16 +$L$cbc_exit:: + mov rsi,QWORD PTR[16+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$cbc_popfq:: + popfq +$L$cbc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_cbc_encrypt:: +asm_AES_cbc_encrypt ENDP +ALIGN 64 +$L$AES_Te:: + DD 0a56363c6h,0a56363c6h + DD 0847c7cf8h,0847c7cf8h + DD 0997777eeh,0997777eeh + DD 08d7b7bf6h,08d7b7bf6h + DD 00df2f2ffh,00df2f2ffh + DD 0bd6b6bd6h,0bd6b6bd6h + DD 0b16f6fdeh,0b16f6fdeh + DD 054c5c591h,054c5c591h + DD 050303060h,050303060h + DD 003010102h,003010102h + DD 0a96767ceh,0a96767ceh + DD 07d2b2b56h,07d2b2b56h + DD 019fefee7h,019fefee7h + DD 062d7d7b5h,062d7d7b5h + DD 0e6abab4dh,0e6abab4dh + DD 09a7676ech,09a7676ech + DD 045caca8fh,045caca8fh + DD 09d82821fh,09d82821fh + DD 040c9c989h,040c9c989h + DD 0877d7dfah,0877d7dfah + DD 015fafaefh,015fafaefh + DD 0eb5959b2h,0eb5959b2h + DD 0c947478eh,0c947478eh + DD 00bf0f0fbh,00bf0f0fbh + DD 0ecadad41h,0ecadad41h + DD 067d4d4b3h,067d4d4b3h + DD 0fda2a25fh,0fda2a25fh + DD 0eaafaf45h,0eaafaf45h + DD 0bf9c9c23h,0bf9c9c23h + DD 0f7a4a453h,0f7a4a453h + DD 0967272e4h,0967272e4h + DD 05bc0c09bh,05bc0c09bh + DD 0c2b7b775h,0c2b7b775h + DD 01cfdfde1h,01cfdfde1h + DD 0ae93933dh,0ae93933dh + DD 06a26264ch,06a26264ch + DD 05a36366ch,05a36366ch + DD 0413f3f7eh,0413f3f7eh + DD 002f7f7f5h,002f7f7f5h + DD 04fcccc83h,04fcccc83h + DD 05c343468h,05c343468h + DD 0f4a5a551h,0f4a5a551h + DD 034e5e5d1h,034e5e5d1h + DD 008f1f1f9h,008f1f1f9h + DD 0937171e2h,0937171e2h + DD 073d8d8abh,073d8d8abh + DD 053313162h,053313162h + DD 03f15152ah,03f15152ah + DD 00c040408h,00c040408h + DD 052c7c795h,052c7c795h + DD 065232346h,065232346h + DD 05ec3c39dh,05ec3c39dh + DD 028181830h,028181830h + DD 0a1969637h,0a1969637h + DD 00f05050ah,00f05050ah + DD 0b59a9a2fh,0b59a9a2fh + DD 00907070eh,00907070eh + DD 036121224h,036121224h + DD 09b80801bh,09b80801bh + DD 03de2e2dfh,03de2e2dfh + DD 026ebebcdh,026ebebcdh + DD 06927274eh,06927274eh + DD 0cdb2b27fh,0cdb2b27fh + DD 09f7575eah,09f7575eah + DD 01b090912h,01b090912h + DD 09e83831dh,09e83831dh + DD 0742c2c58h,0742c2c58h + DD 02e1a1a34h,02e1a1a34h + DD 02d1b1b36h,02d1b1b36h + DD 0b26e6edch,0b26e6edch + DD 0ee5a5ab4h,0ee5a5ab4h + DD 0fba0a05bh,0fba0a05bh + DD 0f65252a4h,0f65252a4h + DD 04d3b3b76h,04d3b3b76h + DD 061d6d6b7h,061d6d6b7h + DD 0ceb3b37dh,0ceb3b37dh + DD 07b292952h,07b292952h + DD 03ee3e3ddh,03ee3e3ddh + DD 0712f2f5eh,0712f2f5eh + DD 097848413h,097848413h + DD 0f55353a6h,0f55353a6h + DD 068d1d1b9h,068d1d1b9h + DD 000000000h,000000000h + DD 02cededc1h,02cededc1h + DD 060202040h,060202040h + DD 01ffcfce3h,01ffcfce3h + DD 0c8b1b179h,0c8b1b179h + DD 0ed5b5bb6h,0ed5b5bb6h + DD 0be6a6ad4h,0be6a6ad4h + DD 046cbcb8dh,046cbcb8dh + DD 0d9bebe67h,0d9bebe67h + DD 04b393972h,04b393972h + DD 0de4a4a94h,0de4a4a94h + DD 0d44c4c98h,0d44c4c98h + DD 0e85858b0h,0e85858b0h + DD 04acfcf85h,04acfcf85h + DD 06bd0d0bbh,06bd0d0bbh + DD 02aefefc5h,02aefefc5h + DD 0e5aaaa4fh,0e5aaaa4fh + DD 016fbfbedh,016fbfbedh + DD 0c5434386h,0c5434386h + DD 0d74d4d9ah,0d74d4d9ah + DD 055333366h,055333366h + DD 094858511h,094858511h + DD 0cf45458ah,0cf45458ah + DD 010f9f9e9h,010f9f9e9h + DD 006020204h,006020204h + DD 0817f7ffeh,0817f7ffeh + DD 0f05050a0h,0f05050a0h + DD 0443c3c78h,0443c3c78h + DD 0ba9f9f25h,0ba9f9f25h + DD 0e3a8a84bh,0e3a8a84bh + DD 0f35151a2h,0f35151a2h + DD 0fea3a35dh,0fea3a35dh + DD 0c0404080h,0c0404080h + DD 08a8f8f05h,08a8f8f05h + DD 0ad92923fh,0ad92923fh + DD 0bc9d9d21h,0bc9d9d21h + DD 048383870h,048383870h + DD 004f5f5f1h,004f5f5f1h + DD 0dfbcbc63h,0dfbcbc63h + DD 0c1b6b677h,0c1b6b677h + DD 075dadaafh,075dadaafh + DD 063212142h,063212142h + DD 030101020h,030101020h + DD 01affffe5h,01affffe5h + DD 00ef3f3fdh,00ef3f3fdh + DD 06dd2d2bfh,06dd2d2bfh + DD 04ccdcd81h,04ccdcd81h + DD 0140c0c18h,0140c0c18h + DD 035131326h,035131326h + DD 02fececc3h,02fececc3h + DD 0e15f5fbeh,0e15f5fbeh + DD 0a2979735h,0a2979735h + DD 0cc444488h,0cc444488h + DD 03917172eh,03917172eh + DD 057c4c493h,057c4c493h + DD 0f2a7a755h,0f2a7a755h + DD 0827e7efch,0827e7efch + DD 0473d3d7ah,0473d3d7ah + DD 0ac6464c8h,0ac6464c8h + DD 0e75d5dbah,0e75d5dbah + DD 02b191932h,02b191932h + DD 0957373e6h,0957373e6h + DD 0a06060c0h,0a06060c0h + DD 098818119h,098818119h + DD 0d14f4f9eh,0d14f4f9eh + DD 07fdcdca3h,07fdcdca3h + DD 066222244h,066222244h + DD 07e2a2a54h,07e2a2a54h + DD 0ab90903bh,0ab90903bh + DD 08388880bh,08388880bh + DD 0ca46468ch,0ca46468ch + DD 029eeeec7h,029eeeec7h + DD 0d3b8b86bh,0d3b8b86bh + DD 03c141428h,03c141428h + DD 079dedea7h,079dedea7h + DD 0e25e5ebch,0e25e5ebch + DD 01d0b0b16h,01d0b0b16h + DD 076dbdbadh,076dbdbadh + DD 03be0e0dbh,03be0e0dbh + DD 056323264h,056323264h + DD 04e3a3a74h,04e3a3a74h + DD 01e0a0a14h,01e0a0a14h + DD 0db494992h,0db494992h + DD 00a06060ch,00a06060ch + DD 06c242448h,06c242448h + DD 0e45c5cb8h,0e45c5cb8h + DD 05dc2c29fh,05dc2c29fh + DD 06ed3d3bdh,06ed3d3bdh + DD 0efacac43h,0efacac43h + DD 0a66262c4h,0a66262c4h + DD 0a8919139h,0a8919139h + DD 0a4959531h,0a4959531h + DD 037e4e4d3h,037e4e4d3h + DD 08b7979f2h,08b7979f2h + DD 032e7e7d5h,032e7e7d5h + DD 043c8c88bh,043c8c88bh + DD 05937376eh,05937376eh + DD 0b76d6ddah,0b76d6ddah + DD 08c8d8d01h,08c8d8d01h + DD 064d5d5b1h,064d5d5b1h + DD 0d24e4e9ch,0d24e4e9ch + DD 0e0a9a949h,0e0a9a949h + DD 0b46c6cd8h,0b46c6cd8h + DD 0fa5656ach,0fa5656ach + DD 007f4f4f3h,007f4f4f3h + DD 025eaeacfh,025eaeacfh + DD 0af6565cah,0af6565cah + DD 08e7a7af4h,08e7a7af4h + DD 0e9aeae47h,0e9aeae47h + DD 018080810h,018080810h + DD 0d5baba6fh,0d5baba6fh + DD 0887878f0h,0887878f0h + DD 06f25254ah,06f25254ah + DD 0722e2e5ch,0722e2e5ch + DD 0241c1c38h,0241c1c38h + DD 0f1a6a657h,0f1a6a657h + DD 0c7b4b473h,0c7b4b473h + DD 051c6c697h,051c6c697h + DD 023e8e8cbh,023e8e8cbh + DD 07cdddda1h,07cdddda1h + DD 09c7474e8h,09c7474e8h + DD 0211f1f3eh,0211f1f3eh + DD 0dd4b4b96h,0dd4b4b96h + DD 0dcbdbd61h,0dcbdbd61h + DD 0868b8b0dh,0868b8b0dh + DD 0858a8a0fh,0858a8a0fh + DD 0907070e0h,0907070e0h + DD 0423e3e7ch,0423e3e7ch + DD 0c4b5b571h,0c4b5b571h + DD 0aa6666cch,0aa6666cch + DD 0d8484890h,0d8484890h + DD 005030306h,005030306h + DD 001f6f6f7h,001f6f6f7h + DD 0120e0e1ch,0120e0e1ch + DD 0a36161c2h,0a36161c2h + DD 05f35356ah,05f35356ah + DD 0f95757aeh,0f95757aeh + DD 0d0b9b969h,0d0b9b969h + DD 091868617h,091868617h + DD 058c1c199h,058c1c199h + DD 0271d1d3ah,0271d1d3ah + DD 0b99e9e27h,0b99e9e27h + DD 038e1e1d9h,038e1e1d9h + DD 013f8f8ebh,013f8f8ebh + DD 0b398982bh,0b398982bh + DD 033111122h,033111122h + DD 0bb6969d2h,0bb6969d2h + DD 070d9d9a9h,070d9d9a9h + DD 0898e8e07h,0898e8e07h + DD 0a7949433h,0a7949433h + DD 0b69b9b2dh,0b69b9b2dh + DD 0221e1e3ch,0221e1e3ch + DD 092878715h,092878715h + DD 020e9e9c9h,020e9e9c9h + DD 049cece87h,049cece87h + DD 0ff5555aah,0ff5555aah + DD 078282850h,078282850h + DD 07adfdfa5h,07adfdfa5h + DD 08f8c8c03h,08f8c8c03h + DD 0f8a1a159h,0f8a1a159h + DD 080898909h,080898909h + DD 0170d0d1ah,0170d0d1ah + DD 0dabfbf65h,0dabfbf65h + DD 031e6e6d7h,031e6e6d7h + DD 0c6424284h,0c6424284h + DD 0b86868d0h,0b86868d0h + DD 0c3414182h,0c3414182h + DD 0b0999929h,0b0999929h + DD 0772d2d5ah,0772d2d5ah + DD 0110f0f1eh,0110f0f1eh + DD 0cbb0b07bh,0cbb0b07bh + DD 0fc5454a8h,0fc5454a8h + DD 0d6bbbb6dh,0d6bbbb6dh + DD 03a16162ch,03a16162ch +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h + DD 000000001h,000000002h,000000004h,000000008h + DD 000000010h,000000020h,000000040h,000000080h + DD 00000001bh,000000036h,080808080h,080808080h + DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh +ALIGN 64 +$L$AES_Td:: + DD 050a7f451h,050a7f451h + DD 05365417eh,05365417eh + DD 0c3a4171ah,0c3a4171ah + DD 0965e273ah,0965e273ah + DD 0cb6bab3bh,0cb6bab3bh + DD 0f1459d1fh,0f1459d1fh + DD 0ab58faach,0ab58faach + DD 09303e34bh,09303e34bh + DD 055fa3020h,055fa3020h + DD 0f66d76adh,0f66d76adh + DD 09176cc88h,09176cc88h + DD 0254c02f5h,0254c02f5h + DD 0fcd7e54fh,0fcd7e54fh + DD 0d7cb2ac5h,0d7cb2ac5h + DD 080443526h,080443526h + DD 08fa362b5h,08fa362b5h + DD 0495ab1deh,0495ab1deh + DD 0671bba25h,0671bba25h + DD 0980eea45h,0980eea45h + DD 0e1c0fe5dh,0e1c0fe5dh + DD 002752fc3h,002752fc3h + DD 012f04c81h,012f04c81h + DD 0a397468dh,0a397468dh + DD 0c6f9d36bh,0c6f9d36bh + DD 0e75f8f03h,0e75f8f03h + DD 0959c9215h,0959c9215h + DD 0eb7a6dbfh,0eb7a6dbfh + DD 0da595295h,0da595295h + DD 02d83bed4h,02d83bed4h + DD 0d3217458h,0d3217458h + DD 02969e049h,02969e049h + DD 044c8c98eh,044c8c98eh + DD 06a89c275h,06a89c275h + DD 078798ef4h,078798ef4h + DD 06b3e5899h,06b3e5899h + DD 0dd71b927h,0dd71b927h + DD 0b64fe1beh,0b64fe1beh + DD 017ad88f0h,017ad88f0h + DD 066ac20c9h,066ac20c9h + DD 0b43ace7dh,0b43ace7dh + DD 0184adf63h,0184adf63h + DD 082311ae5h,082311ae5h + DD 060335197h,060335197h + DD 0457f5362h,0457f5362h + DD 0e07764b1h,0e07764b1h + DD 084ae6bbbh,084ae6bbbh + DD 01ca081feh,01ca081feh + DD 0942b08f9h,0942b08f9h + DD 058684870h,058684870h + DD 019fd458fh,019fd458fh + DD 0876cde94h,0876cde94h + DD 0b7f87b52h,0b7f87b52h + DD 023d373abh,023d373abh + DD 0e2024b72h,0e2024b72h + DD 0578f1fe3h,0578f1fe3h + DD 02aab5566h,02aab5566h + DD 00728ebb2h,00728ebb2h + DD 003c2b52fh,003c2b52fh + DD 09a7bc586h,09a7bc586h + DD 0a50837d3h,0a50837d3h + DD 0f2872830h,0f2872830h + DD 0b2a5bf23h,0b2a5bf23h + DD 0ba6a0302h,0ba6a0302h + DD 05c8216edh,05c8216edh + DD 02b1ccf8ah,02b1ccf8ah + DD 092b479a7h,092b479a7h + DD 0f0f207f3h,0f0f207f3h + DD 0a1e2694eh,0a1e2694eh + DD 0cdf4da65h,0cdf4da65h + DD 0d5be0506h,0d5be0506h + DD 01f6234d1h,01f6234d1h + DD 08afea6c4h,08afea6c4h + DD 09d532e34h,09d532e34h + DD 0a055f3a2h,0a055f3a2h + DD 032e18a05h,032e18a05h + DD 075ebf6a4h,075ebf6a4h + DD 039ec830bh,039ec830bh + DD 0aaef6040h,0aaef6040h + DD 0069f715eh,0069f715eh + DD 051106ebdh,051106ebdh + DD 0f98a213eh,0f98a213eh + DD 03d06dd96h,03d06dd96h + DD 0ae053eddh,0ae053eddh + DD 046bde64dh,046bde64dh + DD 0b58d5491h,0b58d5491h + DD 0055dc471h,0055dc471h + DD 06fd40604h,06fd40604h + DD 0ff155060h,0ff155060h + DD 024fb9819h,024fb9819h + DD 097e9bdd6h,097e9bdd6h + DD 0cc434089h,0cc434089h + DD 0779ed967h,0779ed967h + DD 0bd42e8b0h,0bd42e8b0h + DD 0888b8907h,0888b8907h + DD 0385b19e7h,0385b19e7h + DD 0dbeec879h,0dbeec879h + DD 0470a7ca1h,0470a7ca1h + DD 0e90f427ch,0e90f427ch + DD 0c91e84f8h,0c91e84f8h + DD 000000000h,000000000h + DD 083868009h,083868009h + DD 048ed2b32h,048ed2b32h + DD 0ac70111eh,0ac70111eh + DD 04e725a6ch,04e725a6ch + DD 0fbff0efdh,0fbff0efdh + DD 05638850fh,05638850fh + DD 01ed5ae3dh,01ed5ae3dh + DD 027392d36h,027392d36h + DD 064d90f0ah,064d90f0ah + DD 021a65c68h,021a65c68h + DD 0d1545b9bh,0d1545b9bh + DD 03a2e3624h,03a2e3624h + DD 0b1670a0ch,0b1670a0ch + DD 00fe75793h,00fe75793h + DD 0d296eeb4h,0d296eeb4h + DD 09e919b1bh,09e919b1bh + DD 04fc5c080h,04fc5c080h + DD 0a220dc61h,0a220dc61h + DD 0694b775ah,0694b775ah + DD 0161a121ch,0161a121ch + DD 00aba93e2h,00aba93e2h + DD 0e52aa0c0h,0e52aa0c0h + DD 043e0223ch,043e0223ch + DD 01d171b12h,01d171b12h + DD 00b0d090eh,00b0d090eh + DD 0adc78bf2h,0adc78bf2h + DD 0b9a8b62dh,0b9a8b62dh + DD 0c8a91e14h,0c8a91e14h + DD 08519f157h,08519f157h + DD 04c0775afh,04c0775afh + DD 0bbdd99eeh,0bbdd99eeh + DD 0fd607fa3h,0fd607fa3h + DD 09f2601f7h,09f2601f7h + DD 0bcf5725ch,0bcf5725ch + DD 0c53b6644h,0c53b6644h + DD 0347efb5bh,0347efb5bh + DD 07629438bh,07629438bh + DD 0dcc623cbh,0dcc623cbh + DD 068fcedb6h,068fcedb6h + DD 063f1e4b8h,063f1e4b8h + DD 0cadc31d7h,0cadc31d7h + DD 010856342h,010856342h + DD 040229713h,040229713h + DD 02011c684h,02011c684h + DD 07d244a85h,07d244a85h + DD 0f83dbbd2h,0f83dbbd2h + DD 01132f9aeh,01132f9aeh + DD 06da129c7h,06da129c7h + DD 04b2f9e1dh,04b2f9e1dh + DD 0f330b2dch,0f330b2dch + DD 0ec52860dh,0ec52860dh + DD 0d0e3c177h,0d0e3c177h + DD 06c16b32bh,06c16b32bh + DD 099b970a9h,099b970a9h + DD 0fa489411h,0fa489411h + DD 02264e947h,02264e947h + DD 0c48cfca8h,0c48cfca8h + DD 01a3ff0a0h,01a3ff0a0h + DD 0d82c7d56h,0d82c7d56h + DD 0ef903322h,0ef903322h + DD 0c74e4987h,0c74e4987h + DD 0c1d138d9h,0c1d138d9h + DD 0fea2ca8ch,0fea2ca8ch + DD 0360bd498h,0360bd498h + DD 0cf81f5a6h,0cf81f5a6h + DD 028de7aa5h,028de7aa5h + DD 0268eb7dah,0268eb7dah + DD 0a4bfad3fh,0a4bfad3fh + DD 0e49d3a2ch,0e49d3a2ch + DD 00d927850h,00d927850h + DD 09bcc5f6ah,09bcc5f6ah + DD 062467e54h,062467e54h + DD 0c2138df6h,0c2138df6h + DD 0e8b8d890h,0e8b8d890h + DD 05ef7392eh,05ef7392eh + DD 0f5afc382h,0f5afc382h + DD 0be805d9fh,0be805d9fh + DD 07c93d069h,07c93d069h + DD 0a92dd56fh,0a92dd56fh + DD 0b31225cfh,0b31225cfh + DD 03b99acc8h,03b99acc8h + DD 0a77d1810h,0a77d1810h + DD 06e639ce8h,06e639ce8h + DD 07bbb3bdbh,07bbb3bdbh + DD 0097826cdh,0097826cdh + DD 0f418596eh,0f418596eh + DD 001b79aech,001b79aech + DD 0a89a4f83h,0a89a4f83h + DD 0656e95e6h,0656e95e6h + DD 07ee6ffaah,07ee6ffaah + DD 008cfbc21h,008cfbc21h + DD 0e6e815efh,0e6e815efh + DD 0d99be7bah,0d99be7bah + DD 0ce366f4ah,0ce366f4ah + DD 0d4099feah,0d4099feah + DD 0d67cb029h,0d67cb029h + DD 0afb2a431h,0afb2a431h + DD 031233f2ah,031233f2ah + DD 03094a5c6h,03094a5c6h + DD 0c066a235h,0c066a235h + DD 037bc4e74h,037bc4e74h + DD 0a6ca82fch,0a6ca82fch + DD 0b0d090e0h,0b0d090e0h + DD 015d8a733h,015d8a733h + DD 04a9804f1h,04a9804f1h + DD 0f7daec41h,0f7daec41h + DD 00e50cd7fh,00e50cd7fh + DD 02ff69117h,02ff69117h + DD 08dd64d76h,08dd64d76h + DD 04db0ef43h,04db0ef43h + DD 0544daacch,0544daacch + DD 0df0496e4h,0df0496e4h + DD 0e3b5d19eh,0e3b5d19eh + DD 01b886a4ch,01b886a4ch + DD 0b81f2cc1h,0b81f2cc1h + DD 07f516546h,07f516546h + DD 004ea5e9dh,004ea5e9dh + DD 05d358c01h,05d358c01h + DD 0737487fah,0737487fah + DD 02e410bfbh,02e410bfbh + DD 05a1d67b3h,05a1d67b3h + DD 052d2db92h,052d2db92h + DD 0335610e9h,0335610e9h + DD 01347d66dh,01347d66dh + DD 08c61d79ah,08c61d79ah + DD 07a0ca137h,07a0ca137h + DD 08e14f859h,08e14f859h + DD 0893c13ebh,0893c13ebh + DD 0ee27a9ceh,0ee27a9ceh + DD 035c961b7h,035c961b7h + DD 0ede51ce1h,0ede51ce1h + DD 03cb1477ah,03cb1477ah + DD 059dfd29ch,059dfd29ch + DD 03f73f255h,03f73f255h + DD 079ce1418h,079ce1418h + DD 0bf37c773h,0bf37c773h + DD 0eacdf753h,0eacdf753h + DD 05baafd5fh,05baafd5fh + DD 0146f3ddfh,0146f3ddfh + DD 086db4478h,086db4478h + DD 081f3afcah,081f3afcah + DD 03ec468b9h,03ec468b9h + DD 02c342438h,02c342438h + DD 05f40a3c2h,05f40a3c2h + DD 072c31d16h,072c31d16h + DD 00c25e2bch,00c25e2bch + DD 08b493c28h,08b493c28h + DD 041950dffh,041950dffh + DD 07101a839h,07101a839h + DD 0deb30c08h,0deb30c08h + DD 09ce4b4d8h,09ce4b4d8h + DD 090c15664h,090c15664h + DD 06184cb7bh,06184cb7bh + DD 070b632d5h,070b632d5h + DD 0745c6c48h,0745c6c48h + DD 04257b8d0h,04257b8d0h +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +block_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_block_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_block_prologue + + mov rax,QWORD PTR[24+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_block_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +block_se_handler ENDP + + +ALIGN 16 +key_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_key_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_key_prologue + + lea rax,QWORD PTR[56+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_key_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +key_se_handler ENDP + + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_prologue] + cmp rbx,r10 + jb $L$in_cbc_prologue + + lea r10,QWORD PTR[$L$cbc_fast_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + + lea r10,QWORD PTR[$L$cbc_slow_prologue] + cmp rbx,r10 + jb $L$in_cbc_body + + lea r10,QWORD PTR[$L$cbc_slow_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + +$L$in_cbc_body:: + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$cbc_epilogue] + cmp rbx,r10 + jae $L$in_cbc_prologue + + lea rax,QWORD PTR[8+rax] + + lea r10,QWORD PTR[$L$cbc_popfq] + cmp rbx,r10 + jae $L$in_cbc_prologue + + mov rax,QWORD PTR[8+rax] + lea rax,QWORD PTR[56+rax] + +$L$in_cbc_frame_setup:: + mov rbx,QWORD PTR[((-16))+rax] + mov rbp,QWORD PTR[((-24))+rax] + mov r12,QWORD PTR[((-32))+rax] + mov r13,QWORD PTR[((-40))+rax] + mov r14,QWORD PTR[((-48))+rax] + mov r15,QWORD PTR[((-56))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_cbc_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_asm_AES_encrypt + DD imagerel $L$SEH_end_asm_AES_encrypt + DD imagerel $L$SEH_info_asm_AES_encrypt + + DD imagerel $L$SEH_begin_asm_AES_decrypt + DD imagerel $L$SEH_end_asm_AES_decrypt + DD imagerel $L$SEH_info_asm_AES_decrypt + + DD imagerel $L$SEH_begin_asm_AES_set_encrypt_key + DD imagerel $L$SEH_end_asm_AES_set_encrypt_key + DD imagerel $L$SEH_info_asm_AES_set_encrypt_key + + DD imagerel $L$SEH_begin_asm_AES_set_decrypt_key + DD imagerel $L$SEH_end_asm_AES_set_decrypt_key + DD imagerel $L$SEH_info_asm_AES_set_decrypt_key + + DD imagerel $L$SEH_begin_asm_AES_cbc_encrypt + DD imagerel $L$SEH_end_asm_AES_cbc_encrypt + DD imagerel $L$SEH_info_asm_AES_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_asm_AES_encrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue +$L$SEH_info_asm_AES_decrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue +$L$SEH_info_asm_AES_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue +$L$SEH_info_asm_AES_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue +$L$SEH_info_asm_AES_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel cbc_se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/aes/aesni-x86_64.asm b/win-x86_64/crypto/aes/aesni-x86_64.asm new file mode 100644 index 0000000..53d8afc --- /dev/null +++ b/win-x86_64/crypto/aes/aesni-x86_64.asm @@ -0,0 +1,3631 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC aesni_encrypt + +ALIGN 16 +aesni_encrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_1 +DB 102,15,56,221,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_encrypt ENDP + +PUBLIC aesni_decrypt + +ALIGN 16 +aesni_decrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_dec1_2 +DB 102,15,56,223,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_decrypt ENDP + +ALIGN 16 +_aesni_encrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop2:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop2 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + DB 0F3h,0C3h ;repret +_aesni_encrypt2 ENDP + +ALIGN 16 +_aesni_decrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop2:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop2 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + DB 0F3h,0C3h ;repret +_aesni_decrypt2 ENDP + +ALIGN 16 +_aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop3:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop3 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + DB 0F3h,0C3h ;repret +_aesni_encrypt3 ENDP + +ALIGN 16 +_aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop3:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop3 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + DB 0F3h,0C3h ;repret +_aesni_decrypt3 ENDP + +ALIGN 16 +_aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 + +$L$enc_loop4:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop4 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + DB 0F3h,0C3h ;repret +_aesni_encrypt4 ENDP + +ALIGN 16 +_aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 + +$L$dec_loop4:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop4 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + DB 0F3h,0C3h ;repret +_aesni_decrypt4 ENDP + +ALIGN 16 +_aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,220,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,220,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,220,225 + pxor xmm7,xmm0 + add rax,16 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L$enc_loop6_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop6 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + DB 0F3h,0C3h ;repret +_aesni_encrypt6 ENDP + +ALIGN 16 +_aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,222,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,222,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,222,225 + pxor xmm7,xmm0 + add rax,16 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +$L$dec_loop6_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop6 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + DB 0F3h,0C3h ;repret +_aesni_decrypt6 ENDP + +ALIGN 16 +_aesni_encrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,220,209 + add rax,16 + pxor xmm7,xmm0 +DB 102,15,56,220,217 + pxor xmm8,xmm0 + pxor xmm9,xmm0 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$enc_loop8_enter +ALIGN 16 +$L$enc_loop8:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +$L$enc_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop8 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 +DB 102,68,15,56,221,192 +DB 102,68,15,56,221,200 + DB 0F3h,0C3h ;repret +_aesni_encrypt8 ENDP + +ALIGN 16 +_aesni_decrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,222,209 + add rax,16 + pxor xmm7,xmm0 +DB 102,15,56,222,217 + pxor xmm8,xmm0 + pxor xmm9,xmm0 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$dec_loop8_enter +ALIGN 16 +$L$dec_loop8:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +$L$dec_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop8 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 +DB 102,68,15,56,223,192 +DB 102,68,15,56,223,200 + DB 0F3h,0C3h ;repret +_aesni_decrypt8 ENDP +PUBLIC aesni_ecb_encrypt + +ALIGN 16 +aesni_ecb_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ecb_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ecb_enc_body:: + and rdx,-16 + jz $L$ecb_ret + + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz $L$ecb_decrypt + + cmp rdx,080h + jb $L$ecb_enc_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_enc_loop8_enter:: + + call _aesni_encrypt8 + + sub rdx,080h + jnc $L$ecb_enc_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_enc_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_enc_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_enc_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_enc_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_enc_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_enc_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_enc_six + movdqu xmm8,XMMWORD PTR[96+rdi] + call _aesni_encrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_3 +DB 102,15,56,221,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two:: + call _aesni_encrypt2 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three:: + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four:: + call _aesni_encrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five:: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six:: + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + jmp $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt:: + cmp rdx,080h + jb $L$ecb_dec_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_dec_loop8_enter:: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD PTR[r11] + sub rdx,080h + jnc $L$ecb_dec_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_dec_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_dec_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_dec_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_dec_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_dec_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_dec_six + movups xmm8,XMMWORD PTR[96+rdi] + movups xmm0,XMMWORD PTR[rcx] + call _aesni_decrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_4 +DB 102,15,56,223,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two:: + call _aesni_decrypt2 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three:: + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four:: + call _aesni_decrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six:: + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + +$L$ecb_ret:: + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ecb_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ecb_encrypt:: +aesni_ecb_encrypt ENDP +PUBLIC aesni_ccm64_encrypt_blocks + +ALIGN 16 +aesni_ccm64_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_enc_body:: + mov eax,DWORD PTR[240+rcx] + movdqu xmm6,XMMWORD PTR[r8] + movdqa xmm9,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + shl eax,4 + mov r10d,16 + lea r11,QWORD PTR[rcx] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm2,xmm6 + lea rcx,QWORD PTR[32+rax*1+rcx] +DB 102,15,56,0,247 + sub r10,rax + jmp $L$ccm64_enc_outer +ALIGN 16 +$L$ccm64_enc_outer:: + movups xmm0,XMMWORD PTR[r11] + mov rax,r10 + movups xmm8,XMMWORD PTR[rdi] + + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm0,xmm8 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+r11] + +$L$ccm64_enc2_loop:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm6,xmm9 + dec rdx +DB 102,15,56,221,208 +DB 102,15,56,221,216 + + lea rdi,QWORD PTR[16+rdi] + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD PTR[rsi],xmm8 +DB 102,15,56,0,215 + lea rsi,QWORD PTR[16+rsi] + jnz $L$ccm64_enc_outer + + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_encrypt_blocks:: +aesni_ccm64_encrypt_blocks ENDP +PUBLIC aesni_ccm64_decrypt_blocks + +ALIGN 16 +aesni_ccm64_decrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_decrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_dec_body:: + mov eax,DWORD PTR[240+rcx] + movups xmm6,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm9,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + movaps xmm2,xmm6 + mov r10d,eax + mov r11,rcx +DB 102,15,56,0,247 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_5 +DB 102,15,56,221,209 + shl r10d,4 + mov eax,16 + movups xmm8,XMMWORD PTR[rdi] + paddq xmm6,xmm9 + lea rdi,QWORD PTR[16+rdi] + sub rax,r10 + lea rcx,QWORD PTR[32+r10*1+r11] + mov r10,rax + jmp $L$ccm64_dec_outer +ALIGN 16 +$L$ccm64_dec_outer:: + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + + sub rdx,1 + jz $L$ccm64_dec_break + + movups xmm0,XMMWORD PTR[r11] + mov rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm8 + movups xmm0,XMMWORD PTR[32+r11] + jmp $L$ccm64_dec2_loop +ALIGN 16 +$L$ccm64_dec2_loop:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$ccm64_dec2_loop + movups xmm8,XMMWORD PTR[rdi] + paddq xmm6,xmm9 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + lea rdi,QWORD PTR[16+rdi] + jmp $L$ccm64_dec_outer + +ALIGN 16 +$L$ccm64_dec_break:: + + mov eax,DWORD PTR[240+r11] + movups xmm0,XMMWORD PTR[r11] + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea r11,QWORD PTR[32+r11] + xorps xmm3,xmm8 +$L$oop_enc1_6:: +DB 102,15,56,220,217 + dec eax + movups xmm1,XMMWORD PTR[r11] + lea r11,QWORD PTR[16+r11] + jnz $L$oop_enc1_6 +DB 102,15,56,221,217 + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_dec_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_decrypt_blocks:: +aesni_ccm64_decrypt_blocks ENDP +PUBLIC aesni_ctr32_encrypt_blocks + +ALIGN 16 +aesni_ctr32_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ctr32_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,288 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$ctr32_body:: + lea rbp,QWORD PTR[((-8))+rax] + + cmp rdx,1 + je $L$ctr32_one_shortcut + + movdqu xmm2,XMMWORD PTR[r8] + movdqu xmm0,XMMWORD PTR[rcx] + mov r8d,DWORD PTR[12+r8] + pxor xmm2,xmm0 + mov r11d,DWORD PTR[12+rcx] + movdqa XMMWORD PTR[rsp],xmm2 + bswap r8d + movdqa xmm3,xmm2 + movdqa xmm4,xmm2 + movdqa xmm5,xmm2 + movdqa XMMWORD PTR[64+rsp],xmm2 + movdqa XMMWORD PTR[80+rsp],xmm2 + movdqa XMMWORD PTR[96+rsp],xmm2 + mov r10,rdx + movdqa XMMWORD PTR[112+rsp],xmm2 + + lea rax,QWORD PTR[1+r8] + lea rdx,QWORD PTR[2+r8] + bswap eax + bswap edx + xor eax,r11d + xor edx,r11d +DB 102,15,58,34,216,3 + lea rax,QWORD PTR[3+r8] + movdqa XMMWORD PTR[16+rsp],xmm3 +DB 102,15,58,34,226,3 + bswap eax + mov rdx,r10 + lea r10,QWORD PTR[4+r8] + movdqa XMMWORD PTR[32+rsp],xmm4 + xor eax,r11d + bswap r10d +DB 102,15,58,34,232,3 + xor r10d,r11d + movdqa XMMWORD PTR[48+rsp],xmm5 + lea r9,QWORD PTR[5+r8] + mov DWORD PTR[((64+12))+rsp],r10d + bswap r9d + lea r10,QWORD PTR[6+r8] + mov eax,DWORD PTR[240+rcx] + xor r9d,r11d + bswap r10d + mov DWORD PTR[((80+12))+rsp],r9d + xor r10d,r11d + lea r9,QWORD PTR[7+r8] + mov DWORD PTR[((96+12))+rsp],r10d + bswap r9d + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + xor r9d,r11d + and r10d,71303168 + mov DWORD PTR[((112+12))+rsp],r9d + + movups xmm1,XMMWORD PTR[16+rcx] + + movdqa xmm6,XMMWORD PTR[64+rsp] + movdqa xmm7,XMMWORD PTR[80+rsp] + + cmp rdx,8 + jb $L$ctr32_tail + + sub rdx,6 + cmp r10d,4194304 + je $L$ctr32_6x + + lea rcx,QWORD PTR[128+rcx] + sub rdx,2 + jmp $L$ctr32_loop8 + +ALIGN 16 +$L$ctr32_6x:: + shl eax,4 + mov r10d,48 + bswap r11d + lea rcx,QWORD PTR[32+rax*1+rcx] + sub r10,rax + jmp $L$ctr32_loop6 + +ALIGN 16 +$L$ctr32_loop6:: + add r8d,6 + movups xmm0,XMMWORD PTR[((-48))+r10*1+rcx] +DB 102,15,56,220,209 + mov eax,r8d + xor eax,r11d +DB 102,15,56,220,217 +DB 00fh,038h,0f1h,044h,024h,12 + lea eax,DWORD PTR[1+r8] +DB 102,15,56,220,225 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,28 +DB 102,15,56,220,233 + lea eax,DWORD PTR[2+r8] + xor eax,r11d +DB 102,15,56,220,241 +DB 00fh,038h,0f1h,044h,024h,44 + lea eax,DWORD PTR[3+r8] +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-32))+r10*1+rcx] + xor eax,r11d + +DB 102,15,56,220,208 +DB 00fh,038h,0f1h,044h,024h,60 + lea eax,DWORD PTR[4+r8] +DB 102,15,56,220,216 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,76 +DB 102,15,56,220,224 + lea eax,DWORD PTR[5+r8] + xor eax,r11d +DB 102,15,56,220,232 +DB 00fh,038h,0f1h,044h,024h,92 + mov rax,r10 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-16))+r10*1+rcx] + + call $L$enc_loop6 + + movdqu xmm8,XMMWORD PTR[rdi] + movdqu xmm9,XMMWORD PTR[16+rdi] + movdqu xmm10,XMMWORD PTR[32+rdi] + movdqu xmm11,XMMWORD PTR[48+rdi] + movdqu xmm12,XMMWORD PTR[64+rdi] + movdqu xmm13,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + movups xmm1,XMMWORD PTR[((-64))+r10*1+rcx] + pxor xmm8,xmm2 + movaps xmm2,XMMWORD PTR[rsp] + pxor xmm9,xmm3 + movaps xmm3,XMMWORD PTR[16+rsp] + pxor xmm10,xmm4 + movaps xmm4,XMMWORD PTR[32+rsp] + pxor xmm11,xmm5 + movaps xmm5,XMMWORD PTR[48+rsp] + pxor xmm12,xmm6 + movaps xmm6,XMMWORD PTR[64+rsp] + pxor xmm13,xmm7 + movaps xmm7,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[rsi],xmm8 + movdqu XMMWORD PTR[16+rsi],xmm9 + movdqu XMMWORD PTR[32+rsi],xmm10 + movdqu XMMWORD PTR[48+rsi],xmm11 + movdqu XMMWORD PTR[64+rsi],xmm12 + movdqu XMMWORD PTR[80+rsi],xmm13 + lea rsi,QWORD PTR[96+rsi] + + sub rdx,6 + jnc $L$ctr32_loop6 + + add rdx,6 + jz $L$ctr32_done + + lea eax,DWORD PTR[((-48))+r10] + lea rcx,QWORD PTR[((-80))+r10*1+rcx] + neg eax + shr eax,4 + jmp $L$ctr32_tail + +ALIGN 32 +$L$ctr32_loop8:: + add r8d,8 + movdqa xmm8,XMMWORD PTR[96+rsp] +DB 102,15,56,220,209 + mov r9d,r8d + movdqa xmm9,XMMWORD PTR[112+rsp] +DB 102,15,56,220,217 + bswap r9d + movups xmm0,XMMWORD PTR[((32-128))+rcx] +DB 102,15,56,220,225 + xor r9d,r11d + nop +DB 102,15,56,220,233 + mov DWORD PTR[((0+12))+rsp],r9d + lea r9,QWORD PTR[1+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((48-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((16+12))+rsp],r9d + lea r9,QWORD PTR[2+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((64-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((32+12))+rsp],r9d + lea r9,QWORD PTR[3+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((80-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((48+12))+rsp],r9d + lea r9,QWORD PTR[4+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((96-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((64+12))+rsp],r9d + lea r9,QWORD PTR[5+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((112-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((80+12))+rsp],r9d + lea r9,QWORD PTR[6+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((128-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((96+12))+rsp],r9d + lea r9,QWORD PTR[7+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((144-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + xor r9d,r11d + movdqu xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,232 + mov DWORD PTR[((112+12))+rsp],r9d + cmp eax,11 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((160-128))+rcx] + + jb $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((176-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((192-128))+rcx] + je $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((208-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((224-128))+rcx] + jmp $L$ctr32_enc_done + +ALIGN 16 +$L$ctr32_enc_done:: + movdqu xmm11,XMMWORD PTR[16+rdi] + pxor xmm10,xmm0 + movdqu xmm12,XMMWORD PTR[32+rdi] + pxor xmm11,xmm0 + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm12,xmm0 + movdqu xmm14,XMMWORD PTR[64+rdi] + pxor xmm13,xmm0 + movdqu xmm15,XMMWORD PTR[80+rdi] + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movdqu xmm1,XMMWORD PTR[96+rdi] + lea rdi,QWORD PTR[128+rdi] + +DB 102,65,15,56,221,210 + pxor xmm1,xmm0 + movdqu xmm10,XMMWORD PTR[((112-128))+rdi] +DB 102,65,15,56,221,219 + pxor xmm10,xmm0 + movdqa xmm11,XMMWORD PTR[rsp] +DB 102,65,15,56,221,228 +DB 102,65,15,56,221,237 + movdqa xmm12,XMMWORD PTR[16+rsp] + movdqa xmm13,XMMWORD PTR[32+rsp] +DB 102,65,15,56,221,246 +DB 102,65,15,56,221,255 + movdqa xmm14,XMMWORD PTR[48+rsp] + movdqa xmm15,XMMWORD PTR[64+rsp] +DB 102,68,15,56,221,193 + movdqa xmm0,XMMWORD PTR[80+rsp] + movups xmm1,XMMWORD PTR[((16-128))+rcx] +DB 102,69,15,56,221,202 + + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD PTR[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD PTR[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD PTR[80+rsi],xmm7 + movdqa xmm7,xmm0 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + + sub rdx,8 + jnc $L$ctr32_loop8 + + add rdx,8 + jz $L$ctr32_done + lea rcx,QWORD PTR[((-128))+rcx] + +$L$ctr32_tail:: + lea rcx,QWORD PTR[16+rcx] + cmp rdx,4 + jb $L$ctr32_loop3 + je $L$ctr32_loop4 + + shl eax,4 + movdqa xmm8,XMMWORD PTR[96+rsp] + pxor xmm9,xmm9 + + movups xmm0,XMMWORD PTR[16+rcx] +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea rcx,QWORD PTR[((32-16))+rax*1+rcx] + neg rax +DB 102,15,56,220,225 + add rax,16 + movups xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,233 +DB 102,15,56,220,241 + movups xmm11,XMMWORD PTR[16+rdi] + movups xmm12,XMMWORD PTR[32+rdi] +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 + + call $L$enc_loop8_enter + + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm2,xmm10 + movdqu xmm10,XMMWORD PTR[64+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm10 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + cmp rdx,6 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[80+rdi] + xorps xmm7,xmm11 + movups XMMWORD PTR[80+rsi],xmm7 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[96+rdi] + xorps xmm8,xmm12 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop4:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop4 +DB 102,15,56,221,209 +DB 102,15,56,221,217 + movups xmm10,XMMWORD PTR[rdi] + movups xmm11,XMMWORD PTR[16+rdi] +DB 102,15,56,221,225 +DB 102,15,56,221,233 + movups xmm12,XMMWORD PTR[32+rdi] + movups xmm13,XMMWORD PTR[48+rdi] + + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[48+rsi],xmm5 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop3:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop3 +DB 102,15,56,221,209 +DB 102,15,56,221,217 +DB 102,15,56,221,225 + + movups xmm10,XMMWORD PTR[rdi] + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + cmp rdx,2 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[16+rdi] + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[32+rdi] + xorps xmm4,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_one_shortcut:: + movups xmm2,XMMWORD PTR[r8] + movups xmm10,XMMWORD PTR[rdi] + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_done:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$ctr32_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ctr32_encrypt_blocks:: +aesni_ctr32_encrypt_blocks ENDP +PUBLIC aesni_xts_encrypt + +ALIGN 16 +aesni_xts_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$xts_enc_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_8:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_8 +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + + sub rdx,16*6 + jc $L$xts_enc_short + + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] + jmp $L$xts_enc_grandloop + +ALIGN 32 +$L$xts_enc_grandloop:: + movdqu xmm2,XMMWORD PTR[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,220,209 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,220,217 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,220,225 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,220,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,220,241 + pxor xmm11,xmm9 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,220,208 + pxor xmm13,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,220,216 + pxor xmm14,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_enc_loop6 +ALIGN 32 +$L$xts_enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] + jnz $L$xts_enc_loop6 + + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,220,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,220,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,220,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,220,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,220,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,221,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,221,92,36,16 +DB 102,15,56,221,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,221,108,36,48 +DB 102,15,56,221,116,36,64 +DB 102,15,56,221,124,36,80 + pxor xmm15,xmm9 + + lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc $L$xts_enc_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_enc_short:: + mov r10d,eax + pxor xmm10,xmm0 + add rdx,16*6 + jz $L$xts_enc_done + + pxor xmm11,xmm0 + cmp rdx,020h + jb $L$xts_enc_one + pxor xmm12,xmm0 + je $L$xts_enc_two + + pxor xmm13,xmm0 + cmp rdx,040h + jb $L$xts_enc_three + pxor xmm14,xmm0 + je $L$xts_enc_four + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_encrypt6 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_9:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_9 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_encrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_encrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_done:: + and r9,15 + jz $L$xts_enc_ret + mov rdx,r9 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[rdi] + movzx ecx,BYTE PTR[((-16))+rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[((-16))+rsi],al + mov BYTE PTR[rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_enc_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[((-16))+rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_10:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_10 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[(-16)+rsi],xmm2 + +$L$xts_enc_ret:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$xts_enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_encrypt:: +aesni_xts_encrypt ENDP +PUBLIC aesni_xts_decrypt + +ALIGN 16 +aesni_xts_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$xts_dec_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_11:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_11 +DB 102,15,56,221,209 + xor eax,eax + test rdx,15 + setnz al + shl rax,4 + sub rdx,rax + + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + + sub rdx,16*6 + jc $L$xts_dec_short + + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] + jmp $L$xts_dec_grandloop + +ALIGN 32 +$L$xts_dec_grandloop:: + movdqu xmm2,XMMWORD PTR[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,222,209 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,222,217 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,222,225 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,222,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,222,241 + pxor xmm11,xmm9 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,222,208 + pxor xmm13,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,222,216 + pxor xmm14,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_dec_loop6 +ALIGN 32 +$L$xts_dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] + jnz $L$xts_dec_loop6 + + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,222,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,222,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,222,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,222,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,222,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,223,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,223,92,36,16 +DB 102,15,56,223,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,223,108,36,48 +DB 102,15,56,223,116,36,64 +DB 102,15,56,223,124,36,80 + pxor xmm15,xmm9 + + lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc $L$xts_dec_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_dec_short:: + mov r10d,eax + pxor xmm10,xmm0 + pxor xmm11,xmm0 + add rdx,16*6 + jz $L$xts_dec_done + + pxor xmm12,xmm0 + cmp rdx,020h + jb $L$xts_dec_one + pxor xmm13,xmm0 + je $L$xts_dec_two + + pxor xmm14,xmm0 + cmp rdx,040h + jb $L$xts_dec_three + je $L$xts_dec_four + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_decrypt6 + + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm14,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pcmpgtd xmm14,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + pshufd xmm11,xmm14,013h + and r9,15 + jz $L$xts_dec_ret + + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm11,xmm8 + pxor xmm11,xmm15 + jmp $L$xts_dec_done2 + +ALIGN 16 +$L$xts_dec_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_12:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_12 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm11,xmm12 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_decrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movdqa xmm11,xmm13 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + movdqa xmm11,xmm14 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_decrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + movdqa xmm11,xmm15 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_done:: + and r9,15 + jz $L$xts_dec_ret +$L$xts_dec_done2:: + mov rdx,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rdi] + xorps xmm2,xmm11 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_13:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_13 +DB 102,15,56,223,209 + xorps xmm2,xmm11 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+rdi] + movzx ecx,BYTE PTR[rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[rsi],al + mov BYTE PTR[16+rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_dec_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_14:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_14 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_ret:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$xts_dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_decrypt:: +aesni_xts_decrypt ENDP +PUBLIC aesni_cbc_encrypt + +ALIGN 16 +aesni_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test rdx,rdx + jz $L$cbc_ret + + mov r10d,DWORD PTR[240+rcx] + mov r11,rcx + test r9d,r9d + jz $L$cbc_decrypt + + movups xmm2,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,16 + jb $L$cbc_enc_tail + sub rdx,16 + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movups xmm3,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_15:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_15 +DB 102,15,56,221,209 + mov eax,r10d + mov rcx,r11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + sub rdx,16 + jnc $L$cbc_enc_loop + add rdx,16 + jnz $L$cbc_enc_tail + movups XMMWORD PTR[r8],xmm2 + jmp $L$cbc_ret + +$L$cbc_enc_tail:: + mov rcx,rdx + xchg rsi,rdi + DD 09066A4F3h + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 09066AAF3h + lea rdi,QWORD PTR[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp $L$cbc_enc_loop + +ALIGN 16 +$L$cbc_decrypt:: + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,176 + and rsp,-16 + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$cbc_decrypt_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm10,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,050h + jbe $L$cbc_dec_tail + + movups xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp rdx,070h + jbe $L$cbc_dec_six_or_seven + + and r9d,71303168 + sub rdx,050h + cmp r9d,4194304 + je $L$cbc_dec_loop6_enter + sub rdx,020h + lea rcx,QWORD PTR[112+rcx] + jmp $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8:: + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_loop8_enter:: + movdqu xmm8,XMMWORD PTR[96+rdi] + pxor xmm2,xmm0 + movdqu xmm9,XMMWORD PTR[112+rdi] + pxor xmm3,xmm0 + movups xmm1,XMMWORD PTR[((16-112))+rcx] + pxor xmm4,xmm0 + xor r11,r11 + cmp rdx,070h + pxor xmm5,xmm0 + pxor xmm6,xmm0 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + +DB 102,15,56,222,209 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[((32-112))+rcx] +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 + setnc r11b + shl r11,7 +DB 102,68,15,56,222,201 + add r11,rdi + movups xmm1,XMMWORD PTR[((48-112))+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((64-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((80-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((96-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((112-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((128-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((144-112))+rcx] + cmp eax,11 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((160-112))+rcx] + jb $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((176-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((192-112))+rcx] + je $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((208-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((224-112))+rcx] + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_done:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + pxor xmm10,xmm0 + pxor xmm11,xmm0 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm12,xmm0 + pxor xmm13,xmm0 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movdqu xmm1,XMMWORD PTR[80+rdi] + +DB 102,65,15,56,223,210 + movdqu xmm10,XMMWORD PTR[96+rdi] + pxor xmm1,xmm0 +DB 102,65,15,56,223,219 + pxor xmm10,xmm0 + movdqu xmm0,XMMWORD PTR[112+rdi] +DB 102,65,15,56,223,228 + lea rdi,QWORD PTR[128+rdi] + movdqu xmm11,XMMWORD PTR[r11] +DB 102,65,15,56,223,237 +DB 102,65,15,56,223,246 + movdqu xmm12,XMMWORD PTR[16+r11] + movdqu xmm13,XMMWORD PTR[32+r11] +DB 102,65,15,56,223,255 +DB 102,68,15,56,223,193 + movdqu xmm14,XMMWORD PTR[48+r11] + movdqu xmm15,XMMWORD PTR[64+r11] +DB 102,69,15,56,223,202 + movdqa xmm10,xmm0 + movdqu xmm1,XMMWORD PTR[80+r11] + movups xmm0,XMMWORD PTR[((-112))+rcx] + + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD PTR[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD PTR[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD PTR[80+rsi],xmm7 + movdqa xmm7,xmm1 + movups XMMWORD PTR[96+rsi],xmm8 + lea rsi,QWORD PTR[112+rsi] + + sub rdx,080h + ja $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + lea rcx,QWORD PTR[((-112))+rcx] + add rdx,070h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] + cmp rdx,050h + jbe $L$cbc_dec_tail + + movaps xmm2,xmm11 +$L$cbc_dec_six_or_seven:: + cmp rdx,060h + ja $L$cbc_dec_seven + + movaps xmm8,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + movdqa xmm2,xmm7 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_seven:: + movups xmm8,XMMWORD PTR[96+rdi] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups xmm9,XMMWORD PTR[80+rdi] + pxor xmm2,xmm10 + movups xmm10,XMMWORD PTR[96+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm8,xmm9 + movdqu XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + movdqa xmm2,xmm8 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_loop6:: + movups XMMWORD PTR[rsi],xmm7 + lea rsi,QWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 +$L$cbc_dec_loop6_enter:: + lea rdi,QWORD PTR[96+rdi] + movdqa xmm8,xmm7 + + call _aesni_decrypt6 + + pxor xmm2,xmm10 + movdqa xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + mov rcx,r11 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + mov eax,r10d + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + sub rdx,060h + ja $L$cbc_dec_loop6 + + movdqa xmm2,xmm7 + add rdx,050h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm7 + lea rsi,QWORD PTR[16+rsi] + +$L$cbc_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + sub rdx,010h + jbe $L$cbc_dec_one + + movups xmm3,XMMWORD PTR[16+rdi] + movaps xmm11,xmm2 + sub rdx,010h + jbe $L$cbc_dec_two + + movups xmm4,XMMWORD PTR[32+rdi] + movaps xmm12,xmm3 + sub rdx,010h + jbe $L$cbc_dec_three + + movups xmm5,XMMWORD PTR[48+rdi] + movaps xmm13,xmm4 + sub rdx,010h + jbe $L$cbc_dec_four + + movups xmm6,XMMWORD PTR[64+rdi] + movaps xmm14,xmm5 + movaps xmm15,xmm6 + xorps xmm7,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm15 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + movdqa xmm2,xmm6 + sub rdx,010h + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_one:: + movaps xmm11,xmm2 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movaps xmm10,xmm11 + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two:: + movaps xmm12,xmm3 + call _aesni_decrypt2 + pxor xmm2,xmm10 + movaps xmm10,xmm12 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm3 + lea rsi,QWORD PTR[16+rsi] + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three:: + movaps xmm13,xmm4 + call _aesni_decrypt3 + pxor xmm2,xmm10 + movaps xmm10,xmm13 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqa xmm2,xmm4 + lea rsi,QWORD PTR[32+rsi] + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four:: + movaps xmm14,xmm5 + call _aesni_decrypt4 + pxor xmm2,xmm10 + movaps xmm10,xmm14 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqa xmm2,xmm5 + lea rsi,QWORD PTR[48+rsi] + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_tail_collected:: + movups XMMWORD PTR[r8],xmm10 + and rdx,15 + jnz $L$cbc_dec_tail_partial + movups XMMWORD PTR[rsi],xmm2 + jmp $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial:: + movaps XMMWORD PTR[rsp],xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,QWORD PTR[rsp] + DD 09066A4F3h + +$L$cbc_dec_ret:: + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$cbc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_encrypt:: +aesni_cbc_encrypt ENDP +PUBLIC aesni_set_decrypt_key + +ALIGN 16 +aesni_set_decrypt_key PROC PUBLIC +DB 048h,083h,0ECh,008h + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz $L$dec_key_ret + lea rcx,QWORD PTR[16+rdx*1+r8] + + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] + movups XMMWORD PTR[rcx],xmm0 + movups XMMWORD PTR[r8],xmm1 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + +$L$dec_key_inverse:: + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + movups XMMWORD PTR[16+rcx],xmm0 + movups XMMWORD PTR[(-16)+r8],xmm1 + cmp rcx,r8 + ja $L$dec_key_inverse + + movups xmm0,XMMWORD PTR[r8] +DB 102,15,56,219,192 + movups XMMWORD PTR[rcx],xmm0 +$L$dec_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_decrypt_key:: +aesni_set_decrypt_key ENDP +PUBLIC aesni_set_encrypt_key + +ALIGN 16 +aesni_set_encrypt_key PROC PUBLIC +__aesni_set_encrypt_key:: +DB 048h,083h,0ECh,008h + mov rax,-1 + test rcx,rcx + jz $L$enc_key_ret + test r8,r8 + jz $L$enc_key_ret + + movups xmm0,XMMWORD PTR[rcx] + xorps xmm4,xmm4 + lea rax,QWORD PTR[16+r8] + cmp edx,256 + je $L$14rounds + cmp edx,192 + je $L$12rounds + cmp edx,128 + jne $L$bad_keybits + +$L$10rounds:: + mov edx,9 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,200,1 + call $L$key_expansion_128_cold +DB 102,15,58,223,200,2 + call $L$key_expansion_128 +DB 102,15,58,223,200,4 + call $L$key_expansion_128 +DB 102,15,58,223,200,8 + call $L$key_expansion_128 +DB 102,15,58,223,200,16 + call $L$key_expansion_128 +DB 102,15,58,223,200,32 + call $L$key_expansion_128 +DB 102,15,58,223,200,64 + call $L$key_expansion_128 +DB 102,15,58,223,200,128 + call $L$key_expansion_128 +DB 102,15,58,223,200,27 + call $L$key_expansion_128 +DB 102,15,58,223,200,54 + call $L$key_expansion_128 + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[80+rax],edx + xor eax,eax + jmp $L$enc_key_ret + +ALIGN 16 +$L$12rounds:: + movq xmm2,QWORD PTR[16+rcx] + mov edx,11 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,202,1 + call $L$key_expansion_192a_cold +DB 102,15,58,223,202,2 + call $L$key_expansion_192b +DB 102,15,58,223,202,4 + call $L$key_expansion_192a +DB 102,15,58,223,202,8 + call $L$key_expansion_192b +DB 102,15,58,223,202,16 + call $L$key_expansion_192a +DB 102,15,58,223,202,32 + call $L$key_expansion_192b +DB 102,15,58,223,202,64 + call $L$key_expansion_192a +DB 102,15,58,223,202,128 + call $L$key_expansion_192b + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[48+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$14rounds:: + movups xmm2,XMMWORD PTR[16+rcx] + mov edx,13 + lea rax,QWORD PTR[16+rax] + movups XMMWORD PTR[r8],xmm0 + movups XMMWORD PTR[16+r8],xmm2 +DB 102,15,58,223,202,1 + call $L$key_expansion_256a_cold +DB 102,15,58,223,200,1 + call $L$key_expansion_256b +DB 102,15,58,223,202,2 + call $L$key_expansion_256a +DB 102,15,58,223,200,2 + call $L$key_expansion_256b +DB 102,15,58,223,202,4 + call $L$key_expansion_256a +DB 102,15,58,223,200,4 + call $L$key_expansion_256b +DB 102,15,58,223,202,8 + call $L$key_expansion_256a +DB 102,15,58,223,200,8 + call $L$key_expansion_256b +DB 102,15,58,223,202,16 + call $L$key_expansion_256a +DB 102,15,58,223,200,16 + call $L$key_expansion_256b +DB 102,15,58,223,202,32 + call $L$key_expansion_256a +DB 102,15,58,223,200,32 + call $L$key_expansion_256b +DB 102,15,58,223,202,64 + call $L$key_expansion_256a + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[16+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits:: + mov rax,-2 +$L$enc_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_encrypt_key:: + +ALIGN 16 +$L$key_expansion_128:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_128_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192a:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_192a_cold:: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm:: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192b:: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR[16+rax],xmm3 + lea rax,QWORD PTR[32+rax] + jmp $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a:: + movups XMMWORD PTR[rax],xmm2 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_256a_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_256b:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + DB 0F3h,0C3h ;repret +aesni_set_encrypt_key ENDP + +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32:: + DD 6,6,6,0 +$L$increment64:: + DD 1,0,0,0 +$L$xts_magic:: + DD 087h,0,1,0 +$L$increment1:: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 + +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ecb_ccm64_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + lea rax,QWORD PTR[88+rax] + + jmp $L$common_seh_tail +ecb_ccm64_se_handler ENDP + + +ALIGN 16 +ctr_xts_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[160+r8] + lea rsi,QWORD PTR[((-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + jmp $L$common_rbp_tail +ctr_xts_se_handler ENDP + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_decrypt] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$cbc_decrypt_body] + cmp rbx,r10 + jb $L$restore_cbc_rax + + lea r10,QWORD PTR[$L$cbc_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[16+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$common_rbp_tail:: + mov rax,QWORD PTR[160+r8] + mov rbp,QWORD PTR[rax] + lea rax,QWORD PTR[8+rax] + mov QWORD PTR[160+r8],rbp + jmp $L$common_seh_tail + +$L$restore_cbc_rax:: + mov rax,QWORD PTR[120+r8] + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_ecb_encrypt + DD imagerel $L$SEH_end_aesni_ecb_encrypt + DD imagerel $L$SEH_info_ecb + + DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_info_ccm64_enc + + DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_info_ccm64_dec + + DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_info_ctr32 + + DD imagerel $L$SEH_begin_aesni_xts_encrypt + DD imagerel $L$SEH_end_aesni_xts_encrypt + DD imagerel $L$SEH_info_xts_enc + + DD imagerel $L$SEH_begin_aesni_xts_decrypt + DD imagerel $L$SEH_end_aesni_xts_decrypt + DD imagerel $L$SEH_info_xts_dec + DD imagerel $L$SEH_begin_aesni_cbc_encrypt + DD imagerel $L$SEH_end_aesni_cbc_encrypt + DD imagerel $L$SEH_info_cbc + + DD imagerel aesni_set_decrypt_key + DD imagerel $L$SEH_end_set_decrypt_key + DD imagerel $L$SEH_info_key + + DD imagerel aesni_set_encrypt_key + DD imagerel $L$SEH_end_set_encrypt_key + DD imagerel $L$SEH_info_key +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ecb:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ecb_enc_body,imagerel $L$ecb_enc_ret +$L$SEH_info_ccm64_enc:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret +$L$SEH_info_ccm64_dec:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret +$L$SEH_info_ctr32:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$ctr32_body,imagerel $L$ctr32_epilogue +$L$SEH_info_xts_enc:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$SEH_info_xts_dec:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue +$L$SEH_info_cbc:: +DB 9,0,0,0 + DD imagerel cbc_se_handler +$L$SEH_info_key:: +DB 001h,004h,001h,000h +DB 004h,002h,000h,000h + +.xdata ENDS +END diff --git a/win-x86_64/crypto/aes/bsaes-x86_64.asm b/win-x86_64/crypto/aes/bsaes-x86_64.asm new file mode 100644 index 0000000..3346a7e --- /dev/null +++ b/win-x86_64/crypto/aes/bsaes-x86_64.asm @@ -0,0 +1,2734 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN asm_AES_encrypt:NEAR +EXTERN asm_AES_decrypt:NEAR + + +ALIGN 64 +_bsaes_encrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[80+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 +_bsaes_encrypt8_bitslice:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$enc_sbox +ALIGN 16 +$L$enc_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] + pxor xmm1,XMMWORD PTR[32+rax] + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD PTR[64+rax] + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD PTR[96+rax] + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] +$L$enc_sbox:: + pxor xmm4,xmm5 + pxor xmm1,xmm0 + pxor xmm2,xmm15 + pxor xmm5,xmm1 + pxor xmm4,xmm15 + + pxor xmm5,xmm2 + pxor xmm2,xmm6 + pxor xmm6,xmm4 + pxor xmm2,xmm3 + pxor xmm3,xmm4 + pxor xmm2,xmm0 + + pxor xmm1,xmm6 + pxor xmm0,xmm4 + movdqa xmm10,xmm6 + movdqa xmm9,xmm0 + movdqa xmm8,xmm4 + movdqa xmm12,xmm1 + movdqa xmm11,xmm5 + + pxor xmm10,xmm3 + pxor xmm9,xmm1 + pxor xmm8,xmm2 + movdqa xmm13,xmm10 + pxor xmm12,xmm3 + movdqa xmm7,xmm9 + pxor xmm11,xmm15 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm2 + pxor xmm11,xmm15 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm6 + movdqa xmm11,xmm4 + pxor xmm12,xmm0 + pxor xmm11,xmm5 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm1 + pxor xmm7,xmm13 + movdqa xmm12,xmm3 + pxor xmm8,xmm13 + movdqa xmm13,xmm0 + pand xmm11,xmm2 + movdqa xmm14,xmm6 + pand xmm12,xmm15 + pand xmm13,xmm4 + por xmm14,xmm5 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm5 + movdqa xmm7,xmm4 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm5 + pxor xmm5,xmm4 + pand xmm4,xmm14 + pand xmm5,xmm13 + pxor xmm5,xmm4 + pxor xmm4,xmm9 + pxor xmm11,xmm15 + pxor xmm7,xmm2 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm2 + pand xmm7,xmm14 + pand xmm2,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm2 + pxor xmm11,xmm10 + pxor xmm2,xmm9 + pxor xmm5,xmm11 + pxor xmm15,xmm11 + pxor xmm4,xmm7 + pxor xmm2,xmm7 + + movdqa xmm11,xmm6 + movdqa xmm7,xmm0 + pxor xmm11,xmm3 + pxor xmm7,xmm1 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm3 + pxor xmm11,xmm7 + pxor xmm3,xmm1 + pand xmm7,xmm14 + pand xmm1,xmm12 + pand xmm11,xmm13 + pand xmm3,xmm8 + pxor xmm7,xmm11 + pxor xmm3,xmm1 + pxor xmm11,xmm10 + pxor xmm1,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm6 + pxor xmm6,xmm0 + pand xmm0,xmm14 + pand xmm6,xmm13 + pxor xmm6,xmm0 + pxor xmm0,xmm10 + pxor xmm6,xmm11 + pxor xmm3,xmm11 + pxor xmm0,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm15 + pxor xmm0,xmm5 + pxor xmm3,xmm6 + pxor xmm5,xmm15 + pxor xmm15,xmm0 + + pxor xmm0,xmm4 + pxor xmm4,xmm1 + pxor xmm1,xmm2 + pxor xmm2,xmm4 + pxor xmm3,xmm4 + + pxor xmm5,xmm2 + dec r10d + jl $L$enc_done + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm3,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm5,093h + pxor xmm3,xmm9 + pshufd xmm11,xmm2,093h + pxor xmm5,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm2,xmm11 + pshufd xmm13,xmm1,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm1,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm2 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm5 + pshufd xmm7,xmm2,04Eh + pxor xmm14,xmm1 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm3 + pshufd xmm2,xmm5,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm5,xmm1,04Eh + pxor xmm7,xmm11 + pshufd xmm1,xmm3,04Eh + pxor xmm8,xmm12 + pxor xmm2,xmm10 + pxor xmm6,xmm14 + pxor xmm5,xmm13 + movdqa xmm3,xmm7 + pxor xmm1,xmm9 + movdqa xmm4,xmm8 + movdqa xmm7,XMMWORD PTR[48+r11] + jnz $L$enc_loop + movdqa xmm7,XMMWORD PTR[64+r11] + jmp $L$enc_loop +ALIGN 16 +$L$enc_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm2 + psrlq xmm2,1 + pxor xmm1,xmm4 + pxor xmm2,xmm6 + pand xmm1,xmm7 + pand xmm2,xmm7 + pxor xmm4,xmm1 + psllq xmm1,1 + pxor xmm6,xmm2 + psllq xmm2,1 + pxor xmm1,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm3,xmm5 + pxor xmm15,xmm0 + pand xmm3,xmm7 + pand xmm15,xmm7 + pxor xmm5,xmm3 + psllq xmm3,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm3,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm2 + psrlq xmm2,2 + pxor xmm6,xmm4 + pxor xmm2,xmm1 + pand xmm6,xmm8 + pand xmm2,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm1,xmm2 + psllq xmm2,2 + pxor xmm6,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm5 + pxor xmm15,xmm3 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm5,xmm0 + psllq xmm0,2 + pxor xmm3,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,4 + movdqa xmm10,xmm3 + psrlq xmm3,4 + pxor xmm5,xmm4 + pxor xmm3,xmm1 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm4,xmm5 + psllq xmm5,4 + pxor xmm1,xmm3 + psllq xmm3,4 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm2 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm2,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm3,xmm7 + pxor xmm5,xmm7 + pxor xmm2,xmm7 + pxor xmm6,xmm7 + pxor xmm1,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_encrypt8 ENDP + + +ALIGN 64 +_bsaes_decrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[((-48))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$dec_sbox +ALIGN 16 +$L$dec_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] + pxor xmm1,XMMWORD PTR[32+rax] + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD PTR[64+rax] + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD PTR[96+rax] + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] +$L$dec_sbox:: + pxor xmm2,xmm3 + + pxor xmm3,xmm6 + pxor xmm1,xmm6 + pxor xmm5,xmm3 + pxor xmm6,xmm5 + pxor xmm0,xmm6 + + pxor xmm15,xmm0 + pxor xmm1,xmm4 + pxor xmm2,xmm15 + pxor xmm4,xmm15 + pxor xmm0,xmm2 + movdqa xmm10,xmm2 + movdqa xmm9,xmm6 + movdqa xmm8,xmm0 + movdqa xmm12,xmm3 + movdqa xmm11,xmm4 + + pxor xmm10,xmm15 + pxor xmm9,xmm3 + pxor xmm8,xmm5 + movdqa xmm13,xmm10 + pxor xmm12,xmm15 + movdqa xmm7,xmm9 + pxor xmm11,xmm1 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm5 + pxor xmm11,xmm1 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm2 + movdqa xmm11,xmm0 + pxor xmm12,xmm6 + pxor xmm11,xmm4 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm3 + pxor xmm7,xmm13 + movdqa xmm12,xmm15 + pxor xmm8,xmm13 + movdqa xmm13,xmm6 + pand xmm11,xmm5 + movdqa xmm14,xmm2 + pand xmm12,xmm1 + pand xmm13,xmm0 + por xmm14,xmm4 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm4 + movdqa xmm7,xmm0 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm4 + pxor xmm4,xmm0 + pand xmm0,xmm14 + pand xmm4,xmm13 + pxor xmm4,xmm0 + pxor xmm0,xmm9 + pxor xmm11,xmm1 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm1 + pxor xmm11,xmm7 + pxor xmm1,xmm5 + pand xmm7,xmm14 + pand xmm5,xmm12 + pand xmm11,xmm13 + pand xmm1,xmm8 + pxor xmm7,xmm11 + pxor xmm1,xmm5 + pxor xmm11,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm11 + pxor xmm1,xmm11 + pxor xmm0,xmm7 + pxor xmm5,xmm7 + + movdqa xmm11,xmm2 + movdqa xmm7,xmm6 + pxor xmm11,xmm15 + pxor xmm7,xmm3 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm3 + pand xmm7,xmm14 + pand xmm3,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm3 + pxor xmm11,xmm10 + pxor xmm3,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm2 + pxor xmm2,xmm6 + pand xmm6,xmm14 + pand xmm2,xmm13 + pxor xmm2,xmm6 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm15,xmm11 + pxor xmm6,xmm7 + pxor xmm3,xmm7 + pxor xmm0,xmm6 + pxor xmm5,xmm4 + + pxor xmm3,xmm0 + pxor xmm1,xmm6 + pxor xmm4,xmm6 + pxor xmm3,xmm1 + pxor xmm6,xmm15 + pxor xmm3,xmm4 + pxor xmm2,xmm5 + pxor xmm5,xmm0 + pxor xmm2,xmm3 + + pxor xmm3,xmm15 + pxor xmm6,xmm2 + dec r10d + jl $L$dec_done + + pshufd xmm7,xmm15,04Eh + pshufd xmm13,xmm2,04Eh + pxor xmm7,xmm15 + pshufd xmm14,xmm4,04Eh + pxor xmm13,xmm2 + pshufd xmm8,xmm0,04Eh + pxor xmm14,xmm4 + pshufd xmm9,xmm5,04Eh + pxor xmm8,xmm0 + pshufd xmm10,xmm3,04Eh + pxor xmm9,xmm5 + pxor xmm15,xmm13 + pxor xmm0,xmm13 + pshufd xmm11,xmm1,04Eh + pxor xmm10,xmm3 + pxor xmm5,xmm7 + pxor xmm3,xmm8 + pshufd xmm12,xmm6,04Eh + pxor xmm11,xmm1 + pxor xmm0,xmm14 + pxor xmm1,xmm9 + pxor xmm12,xmm6 + + pxor xmm5,xmm14 + pxor xmm3,xmm13 + pxor xmm1,xmm13 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm1,xmm14 + pxor xmm6,xmm14 + pxor xmm4,xmm12 + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm5,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm3,093h + pxor xmm5,xmm9 + pshufd xmm11,xmm1,093h + pxor xmm3,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm1,xmm11 + pshufd xmm13,xmm2,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm2,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm1 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm3 + pshufd xmm7,xmm1,04Eh + pxor xmm14,xmm2 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm5 + pshufd xmm1,xmm3,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm3,xmm2,04Eh + pxor xmm7,xmm11 + pshufd xmm2,xmm5,04Eh + pxor xmm8,xmm12 + pxor xmm10,xmm1 + pxor xmm6,xmm14 + pxor xmm13,xmm3 + movdqa xmm3,xmm7 + pxor xmm2,xmm9 + movdqa xmm5,xmm13 + movdqa xmm4,xmm8 + movdqa xmm1,xmm2 + movdqa xmm2,xmm10 + movdqa xmm7,XMMWORD PTR[((-16))+r11] + jnz $L$dec_loop + movdqa xmm7,XMMWORD PTR[((-32))+r11] + jmp $L$dec_loop +ALIGN 16 +$L$dec_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm2 + psrlq xmm2,1 + movdqa xmm10,xmm1 + psrlq xmm1,1 + pxor xmm2,xmm4 + pxor xmm1,xmm6 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm4,xmm2 + psllq xmm2,1 + pxor xmm6,xmm1 + psllq xmm1,1 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm5,xmm3 + pxor xmm15,xmm0 + pand xmm5,xmm7 + pand xmm15,xmm7 + pxor xmm3,xmm5 + psllq xmm5,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm5,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm1 + psrlq xmm1,2 + pxor xmm6,xmm4 + pxor xmm1,xmm2 + pand xmm6,xmm8 + pand xmm1,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm2,xmm1 + psllq xmm1,2 + pxor xmm6,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm3 + pxor xmm15,xmm5 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm3,xmm0 + psllq xmm0,2 + pxor xmm5,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,4 + movdqa xmm10,xmm5 + psrlq xmm5,4 + pxor xmm3,xmm4 + pxor xmm5,xmm2 + pand xmm3,xmm7 + pand xmm5,xmm7 + pxor xmm4,xmm3 + psllq xmm3,4 + pxor xmm2,xmm5 + psllq xmm5,4 + pxor xmm3,xmm9 + pxor xmm5,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm1 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm1,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm5,xmm7 + pxor xmm3,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm7 + pxor xmm2,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_decrypt8 ENDP + +ALIGN 16 +_bsaes_key_convert PROC PRIVATE + lea r11,QWORD PTR[$L$masks] + movdqu xmm7,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + movdqa xmm0,XMMWORD PTR[r11] + movdqa xmm1,XMMWORD PTR[16+r11] + movdqa xmm2,XMMWORD PTR[32+r11] + movdqa xmm3,XMMWORD PTR[48+r11] + movdqa xmm4,XMMWORD PTR[64+r11] + pcmpeqd xmm5,xmm5 + + movdqu xmm6,XMMWORD PTR[rcx] + movdqa XMMWORD PTR[rax],xmm7 + lea rax,QWORD PTR[16+rax] + dec r10d + jmp $L$key_loop +ALIGN 16 +$L$key_loop:: +DB 102,15,56,0,244 + + movdqa xmm8,xmm0 + movdqa xmm9,xmm1 + + pand xmm8,xmm6 + pand xmm9,xmm6 + movdqa xmm10,xmm2 + pcmpeqb xmm8,xmm0 + psllq xmm0,4 + movdqa xmm11,xmm3 + pcmpeqb xmm9,xmm1 + psllq xmm1,4 + + pand xmm10,xmm6 + pand xmm11,xmm6 + movdqa xmm12,xmm0 + pcmpeqb xmm10,xmm2 + psllq xmm2,4 + movdqa xmm13,xmm1 + pcmpeqb xmm11,xmm3 + psllq xmm3,4 + + movdqa xmm14,xmm2 + movdqa xmm15,xmm3 + pxor xmm8,xmm5 + pxor xmm9,xmm5 + + pand xmm12,xmm6 + pand xmm13,xmm6 + movdqa XMMWORD PTR[rax],xmm8 + pcmpeqb xmm12,xmm0 + psrlq xmm0,4 + movdqa XMMWORD PTR[16+rax],xmm9 + pcmpeqb xmm13,xmm1 + psrlq xmm1,4 + lea rcx,QWORD PTR[16+rcx] + + pand xmm14,xmm6 + pand xmm15,xmm6 + movdqa XMMWORD PTR[32+rax],xmm10 + pcmpeqb xmm14,xmm2 + psrlq xmm2,4 + movdqa XMMWORD PTR[48+rax],xmm11 + pcmpeqb xmm15,xmm3 + psrlq xmm3,4 + movdqu xmm6,XMMWORD PTR[rcx] + + pxor xmm13,xmm5 + pxor xmm14,xmm5 + movdqa XMMWORD PTR[64+rax],xmm12 + movdqa XMMWORD PTR[80+rax],xmm13 + movdqa XMMWORD PTR[96+rax],xmm14 + movdqa XMMWORD PTR[112+rax],xmm15 + lea rax,QWORD PTR[128+rax] + dec r10d + jnz $L$key_loop + + movdqa xmm7,XMMWORD PTR[80+r11] + + DB 0F3h,0C3h ;repret +_bsaes_key_convert ENDP +EXTERN asm_AES_cbc_encrypt:NEAR +PUBLIC bsaes_cbc_encrypt + +ALIGN 16 +bsaes_cbc_encrypt PROC PUBLIC + mov r11d,DWORD PTR[48+rsp] + cmp r11d,0 + jne asm_AES_cbc_encrypt + cmp r8,128 + jb asm_AES_cbc_encrypt + + mov rax,rsp +$L$cbc_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$cbc_dec_body:: + mov rbp,rsp + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + mov rbx,r10 + shr r14,4 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + movdqu xmm14,XMMWORD PTR[rbx] + sub r14,8 +$L$cbc_dec_loop:: + movdqu xmm15,XMMWORD PTR[r12] + movdqu xmm0,XMMWORD PTR[16+r12] + movdqu xmm1,XMMWORD PTR[32+r12] + movdqu xmm2,XMMWORD PTR[48+r12] + movdqu xmm3,XMMWORD PTR[64+r12] + movdqu xmm4,XMMWORD PTR[80+r12] + mov rax,rsp + movdqu xmm5,XMMWORD PTR[96+r12] + mov r10d,edx + movdqu xmm6,XMMWORD PTR[112+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + pxor xmm4,xmm13 + movdqu XMMWORD PTR[r13],xmm15 + lea r12,QWORD PTR[128+r12] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + sub r14,8 + jnc $L$cbc_dec_loop + + add r14,8 + jz $L$cbc_dec_done + + movdqu xmm15,XMMWORD PTR[r12] + mov rax,rsp + mov r10d,edx + cmp r14,2 + jb $L$cbc_dec_one + movdqu xmm0,XMMWORD PTR[16+r12] + je $L$cbc_dec_two + movdqu xmm1,XMMWORD PTR[32+r12] + cmp r14,4 + jb $L$cbc_dec_three + movdqu xmm2,XMMWORD PTR[48+r12] + je $L$cbc_dec_four + movdqu xmm3,XMMWORD PTR[64+r12] + cmp r14,6 + jb $L$cbc_dec_five + movdqu xmm4,XMMWORD PTR[80+r12] + je $L$cbc_dec_six + movdqu xmm5,XMMWORD PTR[96+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm14,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_six:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm14,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_five:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm14,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_four:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm14,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_three:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm14,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_two:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm14,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_one:: + lea rcx,QWORD PTR[r12] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm14,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm14 + movdqa xmm14,xmm15 + +$L$cbc_dec_done:: + movdqu XMMWORD PTR[rbx],xmm14 + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$cbc_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$cbc_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$cbc_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_cbc_encrypt ENDP + +PUBLIC bsaes_ctr32_encrypt_blocks + +ALIGN 16 +bsaes_ctr32_encrypt_blocks PROC PUBLIC + mov rax,rsp +$L$ctr_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$ctr_enc_body:: + mov rbp,rsp + movdqu xmm0,XMMWORD PTR[r10] + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + movdqa XMMWORD PTR[32+rbp],xmm0 + cmp r8,8 + jb $L$ctr_enc_short + + mov ebx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,ebx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + movdqa xmm8,XMMWORD PTR[rsp] + lea r11,QWORD PTR[$L$ADD1] + movdqa xmm15,XMMWORD PTR[32+rbp] + movdqa xmm7,XMMWORD PTR[((-32))+r11] +DB 102,68,15,56,0,199 +DB 102,68,15,56,0,255 + movdqa XMMWORD PTR[rsp],xmm8 + jmp $L$ctr_enc_loop +ALIGN 16 +$L$ctr_enc_loop:: + movdqa XMMWORD PTR[32+rbp],xmm15 + movdqa xmm0,xmm15 + movdqa xmm1,xmm15 + paddd xmm0,XMMWORD PTR[r11] + movdqa xmm2,xmm15 + paddd xmm1,XMMWORD PTR[16+r11] + movdqa xmm3,xmm15 + paddd xmm2,XMMWORD PTR[32+r11] + movdqa xmm4,xmm15 + paddd xmm3,XMMWORD PTR[48+r11] + movdqa xmm5,xmm15 + paddd xmm4,XMMWORD PTR[64+r11] + movdqa xmm6,xmm15 + paddd xmm5,XMMWORD PTR[80+r11] + paddd xmm6,XMMWORD PTR[96+r11] + + + + movdqa xmm8,XMMWORD PTR[rsp] + lea rax,QWORD PTR[16+rsp] + movdqa xmm7,XMMWORD PTR[((-16))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea r11,QWORD PTR[$L$BS0] + mov r10d,ebx + + call _bsaes_encrypt8_bitslice + + sub r14,8 + jc $L$ctr_enc_loop_done + + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + movdqu xmm9,XMMWORD PTR[32+r12] + movdqu xmm10,XMMWORD PTR[48+r12] + movdqu xmm11,XMMWORD PTR[64+r12] + movdqu xmm12,XMMWORD PTR[80+r12] + movdqu xmm13,XMMWORD PTR[96+r12] + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + pxor xmm7,xmm15 + movdqa xmm15,XMMWORD PTR[32+rbp] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[r13],xmm7 + pxor xmm3,xmm9 + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,xmm10 + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,xmm11 + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,xmm12 + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,xmm13 + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,xmm14 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r11,QWORD PTR[$L$ADD1] + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + paddd xmm15,XMMWORD PTR[112+r11] + jnz $L$ctr_enc_loop + + jmp $L$ctr_enc_done +ALIGN 16 +$L$ctr_enc_loop_done:: + add r14,8 + movdqu xmm7,XMMWORD PTR[r12] + pxor xmm15,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + cmp r14,2 + jb $L$ctr_enc_done + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[16+r13],xmm0 + je $L$ctr_enc_done + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[32+r13],xmm3 + cmp r14,4 + jb $L$ctr_enc_done + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm5,xmm10 + movdqu XMMWORD PTR[48+r13],xmm5 + je $L$ctr_enc_done + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm11 + movdqu XMMWORD PTR[64+r13],xmm2 + cmp r14,6 + jb $L$ctr_enc_done + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm12 + movdqu XMMWORD PTR[80+r13],xmm6 + je $L$ctr_enc_done + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm1,xmm13 + movdqu XMMWORD PTR[96+r13],xmm1 + jmp $L$ctr_enc_done + +ALIGN 16 +$L$ctr_enc_short:: + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[48+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + movdqu xmm0,XMMWORD PTR[r12] + lea r12,QWORD PTR[16+r12] + mov eax,DWORD PTR[44+rbp] + bswap eax + pxor xmm0,XMMWORD PTR[48+rbp] + inc eax + movdqu XMMWORD PTR[r13],xmm0 + bswap eax + lea r13,QWORD PTR[16+r13] + mov DWORD PTR[44+rsp],eax + dec r14 + jnz $L$ctr_enc_short + +$L$ctr_enc_done:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$ctr_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$ctr_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$ctr_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_ctr32_encrypt_blocks ENDP +PUBLIC bsaes_xts_encrypt + +ALIGN 16 +bsaes_xts_encrypt PROC PUBLIC + mov rax,rsp +$L$xts_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_enc_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + and r14,-16 + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_enc_short + jmp $L$xts_enc_loop + +ALIGN 16 +$L$xts_enc_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm1 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_enc_loop + +$L$xts_enc_short:: + add r14,080h + jz $L$xts_enc_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_enc_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_enc_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_enc_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_enc_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_enc_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_enc_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + movdqu XMMWORD PTR[64+r13],xmm2 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + movdqu XMMWORD PTR[48+r13],xmm5 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm3 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_enc_done:: + and ebx,15 + jz $L$xts_enc_ret + mov rdx,r13 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[r12] + movzx ecx,BYTE PTR[((-16))+rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[((-16))+rdx],al + mov BYTE PTR[rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_enc_steal + + movdqu xmm15,XMMWORD PTR[((-16))+r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm6,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[(-16)+r13],xmm6 + +$L$xts_enc_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_encrypt ENDP + +PUBLIC bsaes_xts_decrypt + +ALIGN 16 +bsaes_xts_decrypt PROC PUBLIC + mov rax,rsp +$L$xts_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_dec_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + xor eax,eax + and r14,-16 + test ebx,15 + setnz al + shl rax,4 + sub r14,rax + + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_dec_short + jmp $L$xts_dec_loop + +ALIGN 16 +$L$xts_dec_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_dec_loop + +$L$xts_dec_short:: + add r14,080h + jz $L$xts_dec_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_dec_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_dec_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_dec_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_dec_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_dec_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_dec_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_dec_done:: + and ebx,15 + jz $L$xts_dec_ret + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + movdqa xmm5,xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + movdqu xmm15,XMMWORD PTR[r12] + pxor xmm6,xmm13 + + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm6,XMMWORD PTR[32+rbp] + mov rdx,r13 + movdqu XMMWORD PTR[r13],xmm6 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+r12] + movzx ecx,BYTE PTR[rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[rdx],al + mov BYTE PTR[16+rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_dec_steal + + movdqu xmm15,XMMWORD PTR[r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm5 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm5,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm5 + +$L$xts_dec_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_decrypt ENDP + +ALIGN 64 +_bsaes_const:: +$L$M0ISR:: + DQ 00a0e0206070b0f03h,00004080c0d010509h +$L$ISRM0:: + DQ 001040b0e0205080fh,00306090c00070a0dh +$L$ISR:: + DQ 00504070602010003h,00f0e0d0c080b0a09h +$L$BS0:: + DQ 05555555555555555h,05555555555555555h +$L$BS1:: + DQ 03333333333333333h,03333333333333333h +$L$BS2:: + DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh +$L$SR:: + DQ 00504070600030201h,00f0e0d0c0a09080bh +$L$SRM0:: + DQ 00304090e00050a0fh,001060b0c0207080dh +$L$M0SR:: + DQ 00a0e02060f03070bh,00004080c05090d01h +$L$SWPUP:: + DQ 00706050403020100h,00c0d0e0f0b0a0908h +$L$SWPUPM0SR:: + DQ 00a0d02060c03070bh,00004080f05090e01h +$L$ADD1:: + DQ 00000000000000000h,00000000100000000h +$L$ADD2:: + DQ 00000000000000000h,00000000200000000h +$L$ADD3:: + DQ 00000000000000000h,00000000300000000h +$L$ADD4:: + DQ 00000000000000000h,00000000400000000h +$L$ADD5:: + DQ 00000000000000000h,00000000500000000h +$L$ADD6:: + DQ 00000000000000000h,00000000600000000h +$L$ADD7:: + DQ 00000000000000000h,00000000700000000h +$L$ADD8:: + DQ 00000000000000000h,00000000800000000h +$L$xts_magic:: + DD 087h,0,1,0 +$L$masks:: + DQ 00101010101010101h,00101010101010101h + DQ 00202020202020202h,00202020202020202h + DQ 00404040404040404h,00404040404040404h + DQ 00808080808080808h,00808080808080808h +$L$M0:: + DQ 002060a0e03070b0fh,00004080c0105090dh +$L$63:: + DQ 06363636363636363h,06363636363636363h +DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102 +DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44 +DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44 +DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32 +DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[160+r8] + + lea rsi,QWORD PTR[64+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[160+rax] + + mov rbp,QWORD PTR[112+rax] + mov rbx,QWORD PTR[104+rax] + mov r12,QWORD PTR[96+rax] + mov r13,QWORD PTR[88+rax] + mov r14,QWORD PTR[80+rax] + mov r15,QWORD PTR[72+rax] + lea rax,QWORD PTR[120+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov QWORD PTR[152+r8],rax + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$cbc_dec_prologue + DD imagerel $L$cbc_dec_epilogue + DD imagerel $L$cbc_dec_info + + DD imagerel $L$ctr_enc_prologue + DD imagerel $L$ctr_enc_epilogue + DD imagerel $L$ctr_enc_info + + DD imagerel $L$xts_enc_prologue + DD imagerel $L$xts_enc_epilogue + DD imagerel $L$xts_enc_info + + DD imagerel $L$xts_dec_prologue + DD imagerel $L$xts_dec_epilogue + DD imagerel $L$xts_dec_info + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$cbc_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue +$L$ctr_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue +$L$xts_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$xts_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/aes/vpaes-x86_64.asm b/win-x86_64/crypto/aes/vpaes-x86_64.asm new file mode 100644 index 0000000..292f64d --- /dev/null +++ b/win-x86_64/crypto/aes/vpaes-x86_64.asm @@ -0,0 +1,1143 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core PROC PRIVATE + mov r9,rdx + mov r11,16 + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_ipt] + pandn xmm1,xmm0 + movdqu xmm5,XMMWORD PTR[r9] + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))] +DB 102,15,56,0,193 + pxor xmm2,xmm5 + add r9,16 + pxor xmm0,xmm2 + lea r10,QWORD PTR[$L$k_mc_backward] + jmp $L$enc_entry + +ALIGN 16 +$L$enc_loop:: + + movdqa xmm4,xmm13 + movdqa xmm0,xmm12 +DB 102,15,56,0,226 +DB 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,xmm15 + pxor xmm0,xmm4 + movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11] +DB 102,15,56,0,234 + movdqa xmm4,XMMWORD PTR[r10*1+r11] + movdqa xmm2,xmm14 +DB 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +DB 102,15,56,0,193 + add r9,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and r11,030h + sub rax,1 + pxor xmm0,xmm3 + +$L$enc_entry:: + + movdqa xmm1,xmm9 + movdqa xmm5,xmm11 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,232 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm5 +DB 102,15,56,0,224 + movdqa xmm2,xmm10 + pxor xmm4,xmm5 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm5,XMMWORD PTR[r9] + pxor xmm3,xmm1 + jnz $L$enc_loop + + + movdqa xmm4,XMMWORD PTR[((-96))+r10] + movdqa xmm0,XMMWORD PTR[((-80))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD PTR[64+r10*1+r11] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + DB 0F3h,0C3h ;repret +_vpaes_encrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_decrypt_core PROC PRIVATE + mov r9,rdx + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_dipt] + pandn xmm1,xmm0 + mov r11,rax + psrld xmm1,4 + movdqu xmm5,XMMWORD PTR[r9] + shl r11,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))] + xor r11,030h + lea r10,QWORD PTR[$L$k_dsbd] +DB 102,15,56,0,193 + and r11,030h + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))] + pxor xmm0,xmm2 + add r9,16 + add r11,r10 + jmp $L$dec_entry + +ALIGN 16 +$L$dec_loop:: + + + + movdqa xmm4,XMMWORD PTR[((-32))+r10] + movdqa xmm1,XMMWORD PTR[((-16))+r10] +DB 102,15,56,0,226 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[16+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[32+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[48+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[64+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[80+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + add r9,16 +DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub rax,1 + +$L$dec_entry:: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + movdqa xmm2,xmm11 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm2 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm0,XMMWORD PTR[r9] + pxor xmm3,xmm1 + jnz $L$dec_loop + + + movdqa xmm4,XMMWORD PTR[96+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[112+r10] + movdqa xmm2,XMMWORD PTR[((-352))+r11] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + DB 0F3h,0C3h ;repret +_vpaes_decrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_schedule_core PROC PRIVATE + + + + + + call _vpaes_preheat + movdqa xmm8,XMMWORD PTR[$L$k_rcon] + movdqu xmm0,XMMWORD PTR[rdi] + + + movdqa xmm3,xmm0 + lea r11,QWORD PTR[$L$k_ipt] + call _vpaes_schedule_transform + movdqa xmm7,xmm0 + + lea r10,QWORD PTR[$L$k_sr] + test rcx,rcx + jnz $L$schedule_am_decrypting + + + movdqu XMMWORD PTR[rdx],xmm0 + jmp $L$schedule_go + +$L$schedule_am_decrypting:: + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + movdqu XMMWORD PTR[rdx],xmm3 + xor r8,030h + +$L$schedule_go:: + cmp esi,192 + ja $L$schedule_256 + je $L$schedule_192 + + + + + + + + + + +$L$schedule_128:: + mov esi,10 + +$L$oop_schedule_128:: + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp $L$oop_schedule_128 + + + + + + + + + + + + + + + + +ALIGN 16 +$L$schedule_192:: + movdqu xmm0,XMMWORD PTR[8+rdi] + call _vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov esi,4 + +$L$oop_schedule_192:: + call _vpaes_schedule_round +DB 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp $L$oop_schedule_192 + + + + + + + + + + + +ALIGN 16 +$L$schedule_256:: + movdqu xmm0,XMMWORD PTR[16+rdi] + call _vpaes_schedule_transform + mov esi,7 + +$L$oop_schedule_256:: + call _vpaes_schedule_mangle + movdqa xmm6,xmm0 + + + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd xmm0,xmm0,0FFh + movdqa xmm5,xmm7 + movdqa xmm7,xmm6 + call _vpaes_schedule_low_round + movdqa xmm7,xmm5 + + jmp $L$oop_schedule_256 + + + + + + + + + + + + +ALIGN 16 +$L$schedule_mangle_last:: + + lea r11,QWORD PTR[$L$k_deskew] + test rcx,rcx + jnz $L$schedule_mangle_last_dec + + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,193 + lea r11,QWORD PTR[$L$k_opt] + add rdx,32 + +$L$schedule_mangle_last_dec:: + add rdx,-16 + pxor xmm0,XMMWORD PTR[$L$k_s63] + call _vpaes_schedule_transform + movdqu XMMWORD PTR[rdx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + DB 0F3h,0C3h ;repret +_vpaes_schedule_core ENDP + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_192_smear PROC PRIVATE + pshufd xmm1,xmm6,080h + pshufd xmm0,xmm7,0FEh + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + DB 0F3h,0C3h ;repret +_vpaes_schedule_192_smear ENDP + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_round PROC PRIVATE + + pxor xmm1,xmm1 +DB 102,65,15,58,15,200,15 +DB 102,69,15,58,15,192,15 + pxor xmm7,xmm1 + + + pshufd xmm0,xmm0,0FFh +DB 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round:: + + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD PTR[$L$k_s63] + + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + + + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + DB 0F3h,0C3h ;repret +_vpaes_schedule_round ENDP + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_transform PROC PRIVATE + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[16+r11] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + DB 0F3h,0C3h ;repret +_vpaes_schedule_transform ENDP + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_mangle PROC PRIVATE + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD PTR[$L$k_mc_forward] + test rcx,rcx + jnz $L$schedule_mangle_dec + + + add rdx,16 + pxor xmm4,XMMWORD PTR[$L$k_s63] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + + jmp $L$schedule_mangle_both +ALIGN 16 +$L$schedule_mangle_dec:: + + lea r11,QWORD PTR[$L$k_dksd] + movdqa xmm1,xmm9 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm9 + + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD PTR[16+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[32+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[48+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[64+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[80+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[96+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[112+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + + add rdx,-16 + +$L$schedule_mangle_both:: + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + add r8,-16 + and r8,030h + movdqu XMMWORD PTR[rdx],xmm3 + DB 0F3h,0C3h ;repret +_vpaes_schedule_mangle ENDP + + + + +PUBLIC vpaes_set_encrypt_key + +ALIGN 16 +vpaes_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + + mov ecx,0 + mov r8d,030h + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_encrypt_key:: +vpaes_set_encrypt_key ENDP + +PUBLIC vpaes_set_decrypt_key + +ALIGN 16 +vpaes_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + shl eax,4 + lea rdx,QWORD PTR[16+rax*1+rdx] + + mov ecx,1 + mov r8d,esi + shr r8d,1 + and r8d,32 + xor r8d,32 + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_decrypt_key:: +vpaes_set_decrypt_key ENDP + +PUBLIC vpaes_encrypt + +ALIGN 16 +vpaes_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_encrypt:: +vpaes_encrypt ENDP + +PUBLIC vpaes_decrypt + +ALIGN 16 +vpaes_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_decrypt:: +vpaes_decrypt ENDP +PUBLIC vpaes_cbc_encrypt + +ALIGN 16 +vpaes_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + xchg rdx,rcx + sub rcx,16 + jc $L$cbc_abort + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$cbc_body:: + movdqu xmm6,XMMWORD PTR[r8] + sub rsi,rdi + call _vpaes_preheat + cmp r9d,0 + je $L$cbc_dec_loop + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + pxor xmm0,xmm6 + call _vpaes_encrypt_core + movdqa xmm6,xmm0 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_enc_loop + jmp $L$cbc_done +ALIGN 16 +$L$cbc_dec_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + movdqa xmm7,xmm0 + call _vpaes_decrypt_core + pxor xmm0,xmm6 + movdqa xmm6,xmm7 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_dec_loop +$L$cbc_done:: + movdqu XMMWORD PTR[r8],xmm6 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$cbc_epilogue:: +$L$cbc_abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_cbc_encrypt:: +vpaes_cbc_encrypt ENDP + + + + + + + +ALIGN 16 +_vpaes_preheat PROC PRIVATE + lea r10,QWORD PTR[$L$k_s0F] + movdqa xmm10,XMMWORD PTR[((-32))+r10] + movdqa xmm11,XMMWORD PTR[((-16))+r10] + movdqa xmm9,XMMWORD PTR[r10] + movdqa xmm13,XMMWORD PTR[48+r10] + movdqa xmm12,XMMWORD PTR[64+r10] + movdqa xmm15,XMMWORD PTR[80+r10] + movdqa xmm14,XMMWORD PTR[96+r10] + DB 0F3h,0C3h ;repret +_vpaes_preheat ENDP + + + + + + +ALIGN 64 +_vpaes_consts:: +$L$k_inv:: + DQ 00E05060F0D080180h,0040703090A0B0C02h + DQ 001040A060F0B0780h,0030D0E0C02050809h + +$L$k_s0F:: + DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh + +$L$k_ipt:: + DQ 0C2B2E8985A2A7000h,0CABAE09052227808h + DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h + +$L$k_sb1:: + DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h + DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh +$L$k_sb2:: + DQ 0E27A93C60B712400h,05EB7E955BC982FCDh + DQ 069EB88400AE12900h,0C2A163C8AB82234Ah +$L$k_sbo:: + DQ 0D0D26D176FBDC700h,015AABF7AC502A878h + DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh + +$L$k_mc_forward:: + DQ 00407060500030201h,00C0F0E0D080B0A09h + DQ 0080B0A0904070605h,0000302010C0F0E0Dh + DQ 00C0F0E0D080B0A09h,00407060500030201h + DQ 0000302010C0F0E0Dh,0080B0A0904070605h + +$L$k_mc_backward:: + DQ 00605040702010003h,00E0D0C0F0A09080Bh + DQ 0020100030E0D0C0Fh,00A09080B06050407h + DQ 00E0D0C0F0A09080Bh,00605040702010003h + DQ 00A09080B06050407h,0020100030E0D0C0Fh + +$L$k_sr:: + DQ 00706050403020100h,00F0E0D0C0B0A0908h + DQ 0030E09040F0A0500h,00B06010C07020D08h + DQ 00F060D040B020900h,0070E050C030A0108h + DQ 00B0E0104070A0D00h,00306090C0F020508h + +$L$k_rcon:: + DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h + +$L$k_s63:: + DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh + +$L$k_opt:: + DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h + DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h + +$L$k_deskew:: + DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah + DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h + + + + + +$L$k_dksd:: + DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h + DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh +$L$k_dksb:: + DQ 09A4FCA1F8550D500h,003D653861CC94C99h + DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h +$L$k_dkse:: + DQ 0D5031CCA1FC9D600h,053859A4C994F5086h + DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h +$L$k_dks9:: + DQ 0B6116FC87ED9A700h,04AED933482255BFCh + DQ 04576516227143300h,08BB89FACE9DAFDCEh + + + + + +$L$k_dipt:: + DQ 00F505B040B545F00h,0154A411E114E451Ah + DQ 086E383E660056500h,012771772F491F194h + +$L$k_dsb9:: + DQ 0851C03539A86D600h,0CAD51F504F994CC9h + DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h +$L$k_dsbd:: + DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h + DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h +$L$k_dsbb:: + DQ 0D022649296B44200h,0602646F6B0F2D404h + DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh +$L$k_dsbe:: + DQ 046F2929626D4D000h,02242600464B4F6B0h + DQ 00C55A6CDFFAAC100h,09467F36B98593E32h +$L$k_dsbo:: + DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh + DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch +DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54 +DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97 +DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32 +DB 85,110,105,118,101,114,115,105,116,121,41,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rsi,QWORD PTR[16+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[184+rax] + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_vpaes_set_encrypt_key + DD imagerel $L$SEH_end_vpaes_set_encrypt_key + DD imagerel $L$SEH_info_vpaes_set_encrypt_key + + DD imagerel $L$SEH_begin_vpaes_set_decrypt_key + DD imagerel $L$SEH_end_vpaes_set_decrypt_key + DD imagerel $L$SEH_info_vpaes_set_decrypt_key + + DD imagerel $L$SEH_begin_vpaes_encrypt + DD imagerel $L$SEH_end_vpaes_encrypt + DD imagerel $L$SEH_info_vpaes_encrypt + + DD imagerel $L$SEH_begin_vpaes_decrypt + DD imagerel $L$SEH_end_vpaes_decrypt + DD imagerel $L$SEH_info_vpaes_decrypt + + DD imagerel $L$SEH_begin_vpaes_cbc_encrypt + DD imagerel $L$SEH_end_vpaes_cbc_encrypt + DD imagerel $L$SEH_info_vpaes_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_vpaes_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue +$L$SEH_info_vpaes_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue +$L$SEH_info_vpaes_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_body,imagerel $L$enc_epilogue +$L$SEH_info_vpaes_decrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_body,imagerel $L$dec_epilogue +$L$SEH_info_vpaes_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/modexp512-x86_64.asm b/win-x86_64/crypto/bn/modexp512-x86_64.asm new file mode 100644 index 0000000..d3e4a61 --- /dev/null +++ b/win-x86_64/crypto/bn/modexp512-x86_64.asm @@ -0,0 +1,1887 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +ALIGN 16 +MULADD_128x512 PROC PRIVATE + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[rcx],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rcx],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + DB 0F3h,0C3h ;repret +MULADD_128x512 ENDP + +ALIGN 16 +mont_reduce PROC PRIVATE + lea rdi,QWORD PTR[192+rsp] + mov rsi,QWORD PTR[32+rsp] + add rsi,576 + lea rcx,QWORD PTR[520+rsp] + + mov rbp,QWORD PTR[96+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + mov r8,QWORD PTR[rcx] + add r8,rax + adc rdx,0 + mov QWORD PTR[rdi],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + mov r9,QWORD PTR[8+rcx] + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + mov r10,QWORD PTR[16+rcx] + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + mov r11,QWORD PTR[24+rcx] + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + mov r12,QWORD PTR[32+rcx] + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + mov r13,QWORD PTR[40+rcx] + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + mov r14,QWORD PTR[48+rcx] + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + mov r15,QWORD PTR[56+rcx] + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[104+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rdi],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[112+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[16+rdi],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[120+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[24+rdi],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + xor rax,rax + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + adc r10,QWORD PTR[80+rcx] + adc r11,QWORD PTR[88+rcx] + adc rax,0 + + + + + mov QWORD PTR[64+rdi],r8 + mov QWORD PTR[72+rdi],r9 + mov rbp,r10 + mov QWORD PTR[88+rdi],r11 + + mov QWORD PTR[384+rsp],rax + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + + + + + + + + + add rdi,8*10 + + add rsi,64 + lea rcx,QWORD PTR[296+rsp] + + call MULADD_128x512 + + mov rax,QWORD PTR[384+rsp] + + + add r8,QWORD PTR[((-16))+rdi] + adc r9,QWORD PTR[((-8))+rdi] + mov QWORD PTR[64+rcx],r8 + mov QWORD PTR[72+rcx],r9 + + adc rax,rax + mov QWORD PTR[384+rsp],rax + + lea rdi,QWORD PTR[192+rsp] + add rsi,64 + + + + + + mov r8,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + + mov rax,QWORD PTR[rcx] + mul r8 + mov rbp,rax + mov r9,rdx + + mov rax,QWORD PTR[8+rcx] + mul r8 + add r9,rax + + mov rax,QWORD PTR[rcx] + mul rbx + add r9,rax + + mov QWORD PTR[8+rdi],r9 + + + sub rsi,192 + + mov r8,QWORD PTR[rcx] + mov r9,QWORD PTR[8+rcx] + + call MULADD_128x512 + + + + + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdi,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + + + mov rbp,QWORD PTR[384+rsp] + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + + + adc rbp,rbp + + + + shl rbp,3 + mov rcx,QWORD PTR[32+rsp] + add rbp,rcx + + + xor rsi,rsi + + add r10,QWORD PTR[rbp] + adc r11,QWORD PTR[64+rbp] + adc r12,QWORD PTR[128+rbp] + adc r13,QWORD PTR[192+rbp] + adc r14,QWORD PTR[256+rbp] + adc r15,QWORD PTR[320+rbp] + adc r8,QWORD PTR[384+rbp] + adc r9,QWORD PTR[448+rbp] + + + + sbb rsi,0 + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + mov rbp,1 + sub r10,rax + sbb r11,rbx + sbb r12,rdi + sbb r13,rdx + + + + + sbb rbp,0 + + + + add rcx,512 + mov rax,QWORD PTR[32+rcx] + mov rbx,QWORD PTR[40+rcx] + mov rdi,QWORD PTR[48+rcx] + mov rdx,QWORD PTR[56+rcx] + + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + + + sub rbp,1 + + sbb r14,rax + sbb r15,rbx + sbb r8,rdi + sbb r9,rdx + + + + mov rsi,QWORD PTR[144+rsp] + mov QWORD PTR[rsi],r10 + mov QWORD PTR[8+rsi],r11 + mov QWORD PTR[16+rsi],r12 + mov QWORD PTR[24+rsi],r13 + mov QWORD PTR[32+rsi],r14 + mov QWORD PTR[40+rsi],r15 + mov QWORD PTR[48+rsi],r8 + mov QWORD PTR[56+rsi],r9 + + DB 0F3h,0C3h ;repret +mont_reduce ENDP + +ALIGN 16 +mont_mul_a3b PROC PRIVATE + + + + + mov rbp,QWORD PTR[rdi] + + mov rax,r10 + mul rbp + mov QWORD PTR[520+rsp],rax + mov r10,rdx + mov rax,r11 + mul rbp + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r12 + mul rbp + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r13 + mul rbp + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r14 + mul rbp + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r15 + mul rbp + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r8 + mul rbp + add r15,rax + adc rdx,0 + mov r8,rdx + mov rax,r9 + mul rbp + add r8,rax + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[528+rsp],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[16+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[536+rsp],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + mov rbp,QWORD PTR[24+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r12,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r12 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov r12,rdx + mov rbp,QWORD PTR[32+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r13,rax + adc rdx,0 + mov QWORD PTR[552+rsp],r13 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov r13,rdx + mov rbp,QWORD PTR[40+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r14,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r14 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov r14,rdx + mov rbp,QWORD PTR[48+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r15,rax + adc rdx,0 + mov QWORD PTR[568+rsp],r15 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov r15,rdx + mov rbp,QWORD PTR[56+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov QWORD PTR[584+rsp],r9 + mov QWORD PTR[592+rsp],r10 + mov QWORD PTR[600+rsp],r11 + mov QWORD PTR[608+rsp],r12 + mov QWORD PTR[616+rsp],r13 + mov QWORD PTR[624+rsp],r14 + mov QWORD PTR[632+rsp],r15 + mov QWORD PTR[640+rsp],r8 + + + + + + jmp mont_reduce + + +mont_mul_a3b ENDP + +ALIGN 16 +sqr_reduce PROC PRIVATE + mov rcx,QWORD PTR[16+rsp] + + + + mov rbx,r10 + + mov rax,r11 + mul rbx + mov QWORD PTR[528+rsp],rax + mov r10,rdx + mov rax,r12 + mul rbx + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r13 + mul rbx + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r14 + mul rbx + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r15 + mul rbx + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r8 + mul rbx + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + mov rsi,rdx + + mov QWORD PTR[536+rsp],r10 + + + + + + mov rbx,QWORD PTR[8+rcx] + + mov rax,QWORD PTR[16+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r11 + + mov r10,rdx + mov rax,QWORD PTR[24+rcx] + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[552+rsp],r12 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r13,rax + adc rdx,0 + add r13,r10 + adc rdx,0 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[16+rcx] + + mov rax,QWORD PTR[24+rcx] + mul rbx + add r13,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r13 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + mov QWORD PTR[568+rsp],r14 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r12,rdx + + + + + + mov rbx,QWORD PTR[24+rcx] + + mov rax,QWORD PTR[32+rcx] + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r15 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + mov QWORD PTR[584+rsp],rsi + + mov r10,rdx + mov rax,r8 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + + mov r15,rdx + + + + + mov rbx,QWORD PTR[32+rcx] + + mov rax,QWORD PTR[40+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[592+rsp],r11 + + mov r10,rdx + mov rax,r8 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[600+rsp],r12 + + mov r10,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[40+rcx] + + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[608+rsp],r15 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + mov QWORD PTR[616+rsp],r11 + + mov r12,rdx + + + + + mov rbx,r8 + + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + mov QWORD PTR[624+rsp],r12 + + mov QWORD PTR[632+rsp],rdx + + + mov r10,QWORD PTR[528+rsp] + mov r11,QWORD PTR[536+rsp] + mov r12,QWORD PTR[544+rsp] + mov r13,QWORD PTR[552+rsp] + mov r14,QWORD PTR[560+rsp] + mov r15,QWORD PTR[568+rsp] + + mov rax,QWORD PTR[24+rcx] + mul rax + mov rdi,rax + mov r8,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc r8,0 + + mov rax,QWORD PTR[rcx] + mul rax + mov QWORD PTR[520+rsp],rax + mov rbx,rdx + + mov rax,QWORD PTR[8+rcx] + mul rax + + add r10,rbx + adc r11,rax + adc rdx,0 + + mov rbx,rdx + mov QWORD PTR[528+rsp],r10 + mov QWORD PTR[536+rsp],r11 + + mov rax,QWORD PTR[16+rcx] + mul rax + + add r12,rbx + adc r13,rax + adc rdx,0 + + mov rbx,rdx + + mov QWORD PTR[544+rsp],r12 + mov QWORD PTR[552+rsp],r13 + + xor rbp,rbp + add r14,rbx + adc r15,rdi + adc rbp,0 + + mov QWORD PTR[560+rsp],r14 + mov QWORD PTR[568+rsp],r15 + + + + + mov r10,QWORD PTR[576+rsp] + mov r11,QWORD PTR[584+rsp] + mov r12,QWORD PTR[592+rsp] + mov r13,QWORD PTR[600+rsp] + mov r14,QWORD PTR[608+rsp] + mov r15,QWORD PTR[616+rsp] + mov rdi,QWORD PTR[624+rsp] + mov rsi,QWORD PTR[632+rsp] + + mov rax,r9 + mul rax + mov r9,rax + mov rbx,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc rdi,rdi + adc rsi,rsi + adc rbx,0 + + add r10,rbp + + mov rax,QWORD PTR[32+rcx] + mul rax + + add r10,r8 + adc r11,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[576+rsp],r10 + mov QWORD PTR[584+rsp],r11 + + mov rax,QWORD PTR[40+rcx] + mul rax + + add r12,rbp + adc r13,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[592+rsp],r12 + mov QWORD PTR[600+rsp],r13 + + mov rax,QWORD PTR[48+rcx] + mul rax + + add r14,rbp + adc r15,rax + adc rdx,0 + + mov QWORD PTR[608+rsp],r14 + mov QWORD PTR[616+rsp],r15 + + add rdi,rdx + adc rsi,r9 + adc rbx,0 + + mov QWORD PTR[624+rsp],rdi + mov QWORD PTR[632+rsp],rsi + mov QWORD PTR[640+rsp],rbx + + jmp mont_reduce + + +sqr_reduce ENDP +PUBLIC mod_exp_512 + +mod_exp_512 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_mod_exp_512:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + + + mov r8,rsp + sub rsp,2688 + and rsp,-64 + + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],rcx +$L$body:: + + + + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[512+rsp],xmm4 + movdqa XMMWORD PTR[528+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[544+rsp],xmm0 + movdqa XMMWORD PTR[560+rsp],xmm1 + movdqa XMMWORD PTR[576+rsp],xmm2 + movdqa XMMWORD PTR[592+rsp],xmm3 + + + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + + lea rbx,QWORD PTR[384+rsp] + mov QWORD PTR[136+rsp],rbx + call mont_reduce + + + lea rcx,QWORD PTR[448+rsp] + xor rax,rax + mov QWORD PTR[rcx],rax + mov QWORD PTR[8+rcx],rax + mov QWORD PTR[24+rcx],rax + mov QWORD PTR[32+rcx],rax + mov QWORD PTR[40+rcx],rax + mov QWORD PTR[48+rcx],rax + mov QWORD PTR[56+rcx],rax + mov QWORD PTR[128+rsp],rax + mov QWORD PTR[16+rcx],1 + + lea rbp,QWORD PTR[640+rsp] + mov rsi,rcx + mov rdi,rbp + mov rax,8 +loop_0:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rdi],bx + shr rbx,16 + mov WORD PTR[64+rdi],bx + shr rbx,16 + mov WORD PTR[128+rdi],bx + shr rbx,16 + mov WORD PTR[192+rdi],bx + lea rcx,QWORD PTR[8+rcx] + lea rdi,QWORD PTR[256+rdi] + dec rax + jnz loop_0 + mov rax,31 + mov QWORD PTR[32+rsp],rax + mov QWORD PTR[40+rsp],rbp + + mov QWORD PTR[136+rsp],rsi + mov r10,QWORD PTR[rsi] + mov r11,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + mov r14,QWORD PTR[32+rsi] + mov r15,QWORD PTR[40+rsi] + mov r8,QWORD PTR[48+rsi] + mov r9,QWORD PTR[56+rsi] +init_loop:: + lea rdi,QWORD PTR[384+rsp] + call mont_mul_a3b + lea rsi,QWORD PTR[448+rsp] + mov rbp,QWORD PTR[40+rsp] + add rbp,2 + mov QWORD PTR[40+rsp],rbp + mov rcx,rsi + mov rax,8 +loop_1:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rbp],bx + shr rbx,16 + mov WORD PTR[64+rbp],bx + shr rbx,16 + mov WORD PTR[128+rbp],bx + shr rbx,16 + mov WORD PTR[192+rbp],bx + lea rcx,QWORD PTR[8+rcx] + lea rbp,QWORD PTR[256+rbp] + dec rax + jnz loop_1 + mov rax,QWORD PTR[32+rsp] + sub rax,1 + mov QWORD PTR[32+rsp],rax + jne init_loop + + + + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm1 + movdqa XMMWORD PTR[96+rsp],xmm2 + movdqa XMMWORD PTR[112+rsp],xmm3 + + + + + + mov eax,DWORD PTR[126+rsp] + mov rdx,rax + shr rax,11 + and edx,007FFh + mov DWORD PTR[126+rsp],edx + lea rsi,QWORD PTR[640+rax*2+rsp] + mov rdx,QWORD PTR[8+rsp] + mov rbp,4 +loop_2:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_2 + mov QWORD PTR[48+rsp],505 + + mov rcx,QWORD PTR[8+rsp] + mov QWORD PTR[136+rsp],rcx + mov r10,QWORD PTR[rcx] + mov r11,QWORD PTR[8+rcx] + mov r12,QWORD PTR[16+rcx] + mov r13,QWORD PTR[24+rcx] + mov r14,QWORD PTR[32+rcx] + mov r15,QWORD PTR[40+rcx] + mov r8,QWORD PTR[48+rcx] + mov r9,QWORD PTR[56+rcx] + jmp sqr_2 + +main_loop_a3b:: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2:: + call sqr_reduce + call sqr_reduce + + + + mov rcx,QWORD PTR[48+rsp] + mov rax,rcx + shr rax,4 + mov edx,DWORD PTR[64+rax*2+rsp] + and rcx,15 + shr rdx,cl + and rdx,01Fh + + lea rsi,QWORD PTR[640+rdx*2+rsp] + lea rdx,QWORD PTR[448+rsp] + mov rdi,rdx + mov rbp,4 +loop_3:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_3 + mov rsi,QWORD PTR[8+rsp] + call mont_mul_a3b + + + + mov rcx,QWORD PTR[48+rsp] + sub rcx,5 + mov QWORD PTR[48+rsp],rcx + jge main_loop_a3b + + + +end_main_loop_a3b:: + + + mov rdx,QWORD PTR[8+rsp] + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + movdqa XMMWORD PTR[576+rsp],xmm4 + movdqa XMMWORD PTR[592+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[512+rsp],xmm0 + movdqa XMMWORD PTR[528+rsp],xmm1 + movdqa XMMWORD PTR[544+rsp],xmm2 + movdqa XMMWORD PTR[560+rsp],xmm3 + call mont_reduce + + + + mov rax,QWORD PTR[8+rsp] + mov r8,QWORD PTR[rax] + mov r9,QWORD PTR[8+rax] + mov r10,QWORD PTR[16+rax] + mov r11,QWORD PTR[24+rax] + mov r12,QWORD PTR[32+rax] + mov r13,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r15,QWORD PTR[56+rax] + + + mov rbx,QWORD PTR[24+rsp] + add rbx,512 + + sub r8,QWORD PTR[rbx] + sbb r9,QWORD PTR[8+rbx] + sbb r10,QWORD PTR[16+rbx] + sbb r11,QWORD PTR[24+rbx] + sbb r12,QWORD PTR[32+rbx] + sbb r13,QWORD PTR[40+rbx] + sbb r14,QWORD PTR[48+rbx] + sbb r15,QWORD PTR[56+rbx] + + + mov rsi,QWORD PTR[rax] + mov rdi,QWORD PTR[8+rax] + mov rcx,QWORD PTR[16+rax] + mov rdx,QWORD PTR[24+rax] + cmovnc rsi,r8 + cmovnc rdi,r9 + cmovnc rcx,r10 + cmovnc rdx,r11 + mov QWORD PTR[rax],rsi + mov QWORD PTR[8+rax],rdi + mov QWORD PTR[16+rax],rcx + mov QWORD PTR[24+rax],rdx + + mov rsi,QWORD PTR[32+rax] + mov rdi,QWORD PTR[40+rax] + mov rcx,QWORD PTR[48+rax] + mov rdx,QWORD PTR[56+rax] + cmovnc rsi,r12 + cmovnc rdi,r13 + cmovnc rcx,r14 + cmovnc rdx,r15 + mov QWORD PTR[32+rax],rsi + mov QWORD PTR[40+rax],rdi + mov QWORD PTR[48+rax],rcx + mov QWORD PTR[56+rax],rdx + + mov rsi,QWORD PTR[rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbx,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_mod_exp_512:: +mod_exp_512 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mod_exp_512_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[rax] + + mov rbx,QWORD PTR[32+rax] + mov rbp,QWORD PTR[40+rax] + mov r12,QWORD PTR[24+rax] + mov r13,QWORD PTR[16+rax] + mov r14,QWORD PTR[8+rax] + mov r15,QWORD PTR[rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mod_exp_512_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mod_exp_512 + DD imagerel $L$SEH_end_mod_exp_512 + DD imagerel $L$SEH_info_mod_exp_512 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mod_exp_512:: +DB 9,0,0,0 + DD imagerel mod_exp_512_se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/rsaz-avx2.asm b/win-x86_64/crypto/bn/rsaz-avx2.asm new file mode 100644 index 0000000..f9188f5 --- /dev/null +++ b/win-x86_64/crypto/bn/rsaz-avx2.asm @@ -0,0 +1,29 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC rsaz_avx2_eligible + +rsaz_avx2_eligible PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +rsaz_avx2_eligible ENDP + +PUBLIC rsaz_1024_sqr_avx2 +PUBLIC rsaz_1024_mul_avx2 +PUBLIC rsaz_1024_norm2red_avx2 +PUBLIC rsaz_1024_red2norm_avx2 +PUBLIC rsaz_1024_scatter5_avx2 +PUBLIC rsaz_1024_gather5_avx2 + +rsaz_1024_sqr_avx2 PROC PUBLIC +rsaz_1024_mul_avx2:: +rsaz_1024_norm2red_avx2:: +rsaz_1024_red2norm_avx2:: +rsaz_1024_scatter5_avx2:: +rsaz_1024_gather5_avx2:: +DB 00fh,00bh + DB 0F3h,0C3h ;repret +rsaz_1024_sqr_avx2 ENDP + +.text$ ENDS +END diff --git a/win-x86_64/crypto/bn/rsaz-x86_64.asm b/win-x86_64/crypto/bn/rsaz-x86_64.asm new file mode 100644 index 0000000..86e828d --- /dev/null +++ b/win-x86_64/crypto/bn/rsaz-x86_64.asm @@ -0,0 +1,1326 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC rsaz_512_sqr + +ALIGN 32 +rsaz_512_sqr PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_sqr:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$sqr_body:: + mov rbp,rdx + mov rdx,QWORD PTR[rsi] + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[128+rsp],rcx + jmp $L$oop_sqr + +ALIGN 32 +$L$oop_sqr:: + mov DWORD PTR[((128+8))+rsp],r8d + + mov rbx,rdx + mul rdx + mov r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,rbx + mov r15,rdx + adc r15,0 + + add r8,r8 + mov rcx,r9 + adc r9,r9 + + mul rax + mov QWORD PTR[rsp],rax + add r8,rdx + adc r9,0 + + mov QWORD PTR[8+rsp],r8 + shr rcx,63 + + + mov r8,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mul r8 + add r10,rax + mov rax,QWORD PTR[24+rsi] + mov rbx,rdx + adc rbx,0 + + mul r8 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r11,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r12,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r13,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r14,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r15,rax + mov rax,r8 + adc rdx,0 + add r15,rbx + mov r8,rdx + mov rdx,r10 + adc r8,0 + + add rdx,rdx + lea r10,QWORD PTR[r10*2+rcx] + mov rbx,r11 + adc r11,r11 + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[16+rsp],r9 + mov QWORD PTR[24+rsp],r10 + shr rbx,63 + + + mov r9,QWORD PTR[16+rsi] + mov rax,QWORD PTR[24+rsi] + mul r9 + add r12,rax + mov rax,QWORD PTR[32+rsi] + mov rcx,rdx + adc rcx,0 + + mul r9 + add r13,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r13,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + add r14,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r14,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + mov r10,r12 + lea r12,QWORD PTR[r12*2+rbx] + add r15,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r15,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + shr r10,63 + add r8,rax + mov rax,r9 + adc rdx,0 + add r8,rcx + mov r9,rdx + adc r9,0 + + mov rcx,r13 + lea r13,QWORD PTR[r13*2+r10] + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[32+rsp],r11 + mov QWORD PTR[40+rsp],r12 + shr rcx,63 + + + mov r10,QWORD PTR[24+rsi] + mov rax,QWORD PTR[32+rsi] + mul r10 + add r14,rax + mov rax,QWORD PTR[40+rsi] + mov rbx,rdx + adc rbx,0 + + mul r10 + add r15,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r15,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + mov r12,r14 + lea r14,QWORD PTR[r14*2+rcx] + add r8,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r8,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + shr r12,63 + add r9,rax + mov rax,r10 + adc rdx,0 + add r9,rbx + mov r10,rdx + adc r10,0 + + mov rbx,r15 + lea r15,QWORD PTR[r15*2+r12] + + mul rax + add r13,rax + adc r14,rdx + adc r15,0 + + mov QWORD PTR[48+rsp],r13 + mov QWORD PTR[56+rsp],r14 + shr rbx,63 + + + mov r11,QWORD PTR[32+rsi] + mov rax,QWORD PTR[40+rsi] + mul r11 + add r8,rax + mov rax,QWORD PTR[48+rsi] + mov rcx,rdx + adc rcx,0 + + mul r11 + add r9,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + mov r12,r8 + lea r8,QWORD PTR[r8*2+rbx] + add r9,rcx + mov rcx,rdx + adc rcx,0 + + mul r11 + shr r12,63 + add r10,rax + mov rax,r11 + adc rdx,0 + add r10,rcx + mov r11,rdx + adc r11,0 + + mov rcx,r9 + lea r9,QWORD PTR[r9*2+r12] + + mul rax + add r15,rax + adc r8,rdx + adc r9,0 + + mov QWORD PTR[64+rsp],r15 + mov QWORD PTR[72+rsp],r8 + shr rcx,63 + + + mov r12,QWORD PTR[40+rsi] + mov rax,QWORD PTR[48+rsi] + mul r12 + add r10,rax + mov rax,QWORD PTR[56+rsi] + mov rbx,rdx + adc rbx,0 + + mul r12 + add r11,rax + mov rax,r12 + mov r15,r10 + lea r10,QWORD PTR[r10*2+rcx] + adc rdx,0 + shr r15,63 + add r11,rbx + mov r12,rdx + adc r12,0 + + mov rbx,r11 + lea r11,QWORD PTR[r11*2+r15] + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[80+rsp],r9 + mov QWORD PTR[88+rsp],r10 + + + mov r13,QWORD PTR[48+rsi] + mov rax,QWORD PTR[56+rsi] + mul r13 + add r12,rax + mov rax,r13 + mov r13,rdx + adc r13,0 + + xor r14,r14 + shl rbx,1 + adc r12,r12 + adc r13,r13 + adc r14,r14 + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[96+rsp],r11 + mov QWORD PTR[104+rsp],r12 + + + mov rax,QWORD PTR[56+rsi] + mul rax + add r13,rax + adc rdx,0 + + add r14,rdx + + mov QWORD PTR[112+rsp],r13 + mov QWORD PTR[120+rsp],r14 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD PTR[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz $L$oop_sqr + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$sqr_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_sqr:: +rsaz_512_sqr ENDP +PUBLIC rsaz_512_mul + +ALIGN 32 +rsaz_512_mul PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_body:: +DB 102,72,15,110,199 +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + mov rbx,QWORD PTR[rdx] + mov rbp,rdx + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul:: +rsaz_512_mul ENDP +PUBLIC rsaz_512_mul_gather4 + +ALIGN 32 +rsaz_512_mul_gather4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_gather4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_gather4_body:: + mov eax,DWORD PTR[64+r9*4+rdx] +DB 102,72,15,110,199 + mov ebx,DWORD PTR[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + + shl rax,32 + or rbx,rax + mov rax,QWORD PTR[rsi] + mov rcx,QWORD PTR[8+rsi] + lea rbp,QWORD PTR[128+r9*4+rdx] + mul rbx + mov QWORD PTR[rsp],rax + mov rax,rcx + mov r8,rdx + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + lea rbp,QWORD PTR[128+rbp] + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rsp] + mov ecx,7 + jmp $L$oop_mul_gather + +ALIGN 32 +$L$oop_mul_gather:: + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[128+rbp] + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul_gather + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_gather4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_gather4:: +rsaz_512_mul_gather4 ENDP +PUBLIC rsaz_512_mul_scatter4 + +ALIGN 32 +rsaz_512_mul_scatter4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_scatter4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_scatter4_body:: + lea r8,QWORD PTR[r9*4+r8] +DB 102,72,15,110,199 +DB 102,72,15,110,202 +DB 102,73,15,110,208 + mov QWORD PTR[128+rsp],rcx + + mov rbp,rdi + mov rbx,QWORD PTR[rdi] + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] +DB 102,72,15,126,214 + sbb rcx,rcx + + call __rsaz_512_subtract + + mov DWORD PTR[rsi],r8d + shr r8,32 + mov DWORD PTR[128+rsi],r9d + shr r9,32 + mov DWORD PTR[256+rsi],r10d + shr r10,32 + mov DWORD PTR[384+rsi],r11d + shr r11,32 + mov DWORD PTR[512+rsi],r12d + shr r12,32 + mov DWORD PTR[640+rsi],r13d + shr r13,32 + mov DWORD PTR[768+rsi],r14d + shr r14,32 + mov DWORD PTR[896+rsi],r15d + shr r15,32 + mov DWORD PTR[64+rsi],r8d + mov DWORD PTR[192+rsi],r9d + mov DWORD PTR[320+rsi],r10d + mov DWORD PTR[448+rsi],r11d + mov DWORD PTR[576+rsi],r12d + mov DWORD PTR[704+rsi],r13d + mov DWORD PTR[832+rsi],r14d + mov DWORD PTR[960+rsi],r15d + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_scatter4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_scatter4:: +rsaz_512_mul_scatter4 ENDP +PUBLIC rsaz_512_mul_by_one + +ALIGN 32 +rsaz_512_mul_by_one PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_by_one:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_by_one_body:: + mov rbp,rdx + mov QWORD PTR[128+rsp],rcx + + mov r8,QWORD PTR[rsi] + pxor xmm0,xmm0 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + + movdqa XMMWORD PTR[rsp],xmm0 + movdqa XMMWORD PTR[16+rsp],xmm0 + movdqa XMMWORD PTR[32+rsp],xmm0 + movdqa XMMWORD PTR[48+rsp],xmm0 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm0 + movdqa XMMWORD PTR[96+rsp],xmm0 + call __rsaz_512_reduce + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_by_one_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_by_one:: +rsaz_512_mul_by_one ENDP + +ALIGN 32 +__rsaz_512_reduce PROC PRIVATE + mov rbx,r8 + imul rbx,QWORD PTR[((128+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$reduction_loop + +ALIGN 32 +$L$reduction_loop:: + mul rbx + mov rax,QWORD PTR[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rbp] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r10,r11 + mov rsi,QWORD PTR[((128+8))+rsp] + + + adc rdx,0 + mov r11,rdx + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rbp] + adc rdx,0 + imul rsi,r8 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jne $L$reduction_loop + + DB 0F3h,0C3h ;repret +__rsaz_512_reduce ENDP + +ALIGN 32 +__rsaz_512_subtract PROC PRIVATE + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + mov r8,QWORD PTR[rbp] + mov r9,QWORD PTR[8+rbp] + neg r8 + not r9 + and r8,rcx + mov r10,QWORD PTR[16+rbp] + and r9,rcx + not r10 + mov r11,QWORD PTR[24+rbp] + and r10,rcx + not r11 + mov r12,QWORD PTR[32+rbp] + and r11,rcx + not r12 + mov r13,QWORD PTR[40+rbp] + and r12,rcx + not r13 + mov r14,QWORD PTR[48+rbp] + and r13,rcx + not r14 + mov r15,QWORD PTR[56+rbp] + and r14,rcx + not r15 + and r15,rcx + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_subtract ENDP + +ALIGN 32 +__rsaz_512_mul PROC PRIVATE + lea rdi,QWORD PTR[8+rsp] + + mov rax,QWORD PTR[rsi] + mul rbx + mov QWORD PTR[rdi],rax + mov rax,QWORD PTR[8+rsi] + mov r8,rdx + + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[8+rbp] + lea rdi,QWORD PTR[8+rdi] + + mov ecx,7 + jmp $L$oop_mul + +ALIGN 32 +$L$oop_mul:: + mov rbx,QWORD PTR[rbp] + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + lea rbp,QWORD PTR[8+rbp] + adc r14,0 + + mul rbx + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_mul ENDP +PUBLIC rsaz_512_scatter4 + +ALIGN 16 +rsaz_512_scatter4 PROC PUBLIC + lea rcx,QWORD PTR[r8*4+rcx] + mov r9d,8 + jmp $L$oop_scatter +ALIGN 16 +$L$oop_scatter:: + mov rax,QWORD PTR[rdx] + lea rdx,QWORD PTR[8+rdx] + mov DWORD PTR[rcx],eax + shr rax,32 + mov DWORD PTR[64+rcx],eax + lea rcx,QWORD PTR[128+rcx] + dec r9d + jnz $L$oop_scatter + DB 0F3h,0C3h ;repret +rsaz_512_scatter4 ENDP + +PUBLIC rsaz_512_gather4 + +ALIGN 16 +rsaz_512_gather4 PROC PUBLIC + lea rdx,QWORD PTR[r8*4+rdx] + mov r9d,8 + jmp $L$oop_gather +ALIGN 16 +$L$oop_gather:: + mov eax,DWORD PTR[rdx] + mov r8d,DWORD PTR[64+rdx] + lea rdx,QWORD PTR[128+rdx] + shl r8,32 + or rax,r8 + mov QWORD PTR[rcx],rax + lea rcx,QWORD PTR[8+rcx] + dec r9d + jnz $L$oop_gather + DB 0F3h,0C3h ;repret +rsaz_512_gather4 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rax,QWORD PTR[((128+24+48))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rsaz_512_sqr + DD imagerel $L$SEH_end_rsaz_512_sqr + DD imagerel $L$SEH_info_rsaz_512_sqr + + DD imagerel $L$SEH_begin_rsaz_512_mul + DD imagerel $L$SEH_end_rsaz_512_mul + DD imagerel $L$SEH_info_rsaz_512_mul + + DD imagerel $L$SEH_begin_rsaz_512_mul_gather4 + DD imagerel $L$SEH_end_rsaz_512_mul_gather4 + DD imagerel $L$SEH_info_rsaz_512_mul_gather4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_end_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_info_rsaz_512_mul_scatter4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_by_one + DD imagerel $L$SEH_end_rsaz_512_mul_by_one + DD imagerel $L$SEH_info_rsaz_512_mul_by_one + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rsaz_512_sqr:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue +$L$SEH_info_rsaz_512_mul:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_rsaz_512_mul_gather4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue +$L$SEH_info_rsaz_512_mul_scatter4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue +$L$SEH_info_rsaz_512_mul_by_one:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/x86_64-mont.asm b/win-x86_64/crypto/bn/x86_64-mont.asm new file mode 100644 index 0000000..a409325 --- /dev/null +++ b/win-x86_64/crypto/bn/x86_64-mont.asm @@ -0,0 +1,945 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont + +ALIGN 16 +bn_mul_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,3 + jnz $L$mul_enter + cmp r9d,8 + jb $L$mul_enter + cmp rdx,rsi + jne $L$mul4x_enter + test r9d,7 + jz $L$sqr8x_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[2+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul_body:: + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy:: + mov rsi,QWORD PTR[r14*8+rsp] + mov rcx,QWORD PTR[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rsi + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont:: +bn_mul_mont ENDP + +ALIGN 16 +bn_mul4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[4+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul4x_body:: + mov QWORD PTR[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$1st4x +ALIGN 16 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + lea r14,QWORD PTR[1+r14] +ALIGN 4 +$L$outer4x:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov r10,QWORD PTR[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$inner4x +ALIGN 16 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r15*8+rsp] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,QWORD PTR[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r9*8+rsp] + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + cmp r14,r9 + jb $L$outer4x + mov rdi,QWORD PTR[16+r9*8+rsp] + mov rax,QWORD PTR[rsp] + mov rdx,QWORD PTR[8+rsp] + shr r9,2 + lea rsi,QWORD PTR[rsp] + xor r14,r14 + + sub rax,QWORD PTR[rcx] + mov rbx,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[24+rsi] + sbb rdx,QWORD PTR[8+rcx] + lea r15,QWORD PTR[((-1))+r9] + jmp $L$sub4x +ALIGN 16 +$L$sub4x:: + mov QWORD PTR[r14*8+rdi],rax + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov rax,QWORD PTR[32+r14*8+rsi] + mov rdx,QWORD PTR[40+r14*8+rsi] + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + mov QWORD PTR[24+r14*8+rdi],rbp + sbb rax,QWORD PTR[32+r14*8+rcx] + mov rbx,QWORD PTR[48+r14*8+rsi] + mov rbp,QWORD PTR[56+r14*8+rsi] + sbb rdx,QWORD PTR[40+r14*8+rcx] + lea r14,QWORD PTR[4+r14] + dec r15 + jnz $L$sub4x + + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[32+r14*8+rsi] + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + + sbb rax,0 +DB 66h, 48h, 0fh, 6eh, 0c0h + punpcklqdq xmm0,xmm0 + mov QWORD PTR[24+r14*8+rdi],rbp + xor r14,r14 + + mov r15,r9 + pxor xmm5,xmm5 + jmp $L$copy4x +ALIGN 16 +$L$copy4x:: + movdqu xmm2,XMMWORD PTR[r14*1+rsp] + movdqu xmm4,XMMWORD PTR[16+r14*1+rsp] + movdqu xmm1,XMMWORD PTR[r14*1+rdi] + movdqu xmm3,XMMWORD PTR[16+r14*1+rdi] + pxor xmm2,xmm1 + pxor xmm4,xmm3 + pand xmm2,xmm0 + pand xmm4,xmm0 + pxor xmm2,xmm1 + pxor xmm4,xmm3 + movdqu XMMWORD PTR[r14*1+rdi],xmm2 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm4 + movdqa XMMWORD PTR[r14*1+rsp],xmm5 + movdqa XMMWORD PTR[16+r14*1+rsp],xmm5 + + lea r14,QWORD PTR[32+r14] + dec r15 + jnz $L$copy4x + + shl r9,2 + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont:: +bn_mul4x_mont ENDP +EXTERN bn_sqr8x_internal:NEAR + + +ALIGN 32 +bn_sqr8x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr8x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$sqr8x_enter:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r10d,r9d + shl r9d,3 + shl r10,3+2 + neg r9 + + + + + + + lea r11,QWORD PTR[((-64))+r9*4+rsp] + mov r8,QWORD PTR[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$sqr8x_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + jmp $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*4] + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$sqr8x_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + lea r11,QWORD PTR[64+r9*2+rsp] + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$sqr8x_body:: + + mov rbp,r9 +DB 102,73,15,110,211 + shr rbp,3+2 + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + jmp $L$sqr8x_copy_n + +ALIGN 32 +$L$sqr8x_copy_n:: + movq xmm0,QWORD PTR[rcx] + movq xmm1,QWORD PTR[8+rcx] + movq xmm3,QWORD PTR[16+rcx] + movq xmm4,QWORD PTR[24+rcx] + lea rcx,QWORD PTR[32+rcx] + movdqa XMMWORD PTR[r11],xmm0 + movdqa XMMWORD PTR[16+r11],xmm1 + movdqa XMMWORD PTR[32+r11],xmm3 + movdqa XMMWORD PTR[48+r11],xmm4 + lea r11,QWORD PTR[64+r11] + dec rbp + jnz $L$sqr8x_copy_n + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + call bn_sqr8x_internal + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + lea rdx,QWORD PTR[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD PTR[40+rsp] + jmp $L$sqr8x_zero + +ALIGN 32 +$L$sqr8x_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + movdqa XMMWORD PTR[rdx],xmm0 + movdqa XMMWORD PTR[16+rdx],xmm0 + movdqa XMMWORD PTR[32+rdx],xmm0 + movdqa XMMWORD PTR[48+rdx],xmm0 + lea rdx,QWORD PTR[64+rdx] + dec r9 + jnz $L$sqr8x_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$sqr8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_sqr8x_mont:: +bn_sqr8x_mont ENDP +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + jmp $L$common_seh_tail +mul_handler ENDP + + +ALIGN 16 +sqr_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[40+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +sqr_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont + DD imagerel $L$SEH_end_bn_mul_mont + DD imagerel $L$SEH_info_bn_mul_mont + + DD imagerel $L$SEH_begin_bn_mul4x_mont + DD imagerel $L$SEH_end_bn_mul4x_mont + DD imagerel $L$SEH_info_bn_mul4x_mont + + DD imagerel $L$SEH_begin_bn_sqr8x_mont + DD imagerel $L$SEH_end_bn_sqr8x_mont + DD imagerel $L$SEH_info_bn_sqr8x_mont +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_bn_mul4x_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +$L$SEH_info_bn_sqr8x_mont:: +DB 9,0,0,0 + DD imagerel sqr_handler + DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/x86_64-mont5.asm b/win-x86_64/crypto/bn/x86_64-mont5.asm new file mode 100644 index 0000000..90c6100 --- /dev/null +++ b/win-x86_64/crypto/bn/x86_64-mont5.asm @@ -0,0 +1,2061 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont_gather5 + +ALIGN 64 +bn_mul_mont_gather5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,7 + jnz $L$mul_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + mov r9d,r9d + mov rax,rsp + mov r10d,DWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + lea r11,QWORD PTR[2+r9] + neg r11 + lea rsp,QWORD PTR[r11*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: + mov r12,rdx + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+r12] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + +DB 102,72,15,126,195 + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy:: + mov rsi,QWORD PTR[r14*8+rsp] + mov rcx,QWORD PTR[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rsi + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont_gather5:: +bn_mul_mont_gather5 ENDP + +ALIGN 32 +bn_mul4x_mont_gather5 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$mul4xsp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mul4xsp_done:: + and rsp,-64 + neg r9 + + mov QWORD PTR[40+rsp],rax +$L$mul4x_body:: + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont_gather5:: +bn_mul4x_mont_gather5 ENDP + + +ALIGN 32 +mul4x_internal PROC PRIVATE + shl r9,5 + mov r10d,DWORD PTR[56+rax] + lea r13,QWORD PTR[256+r9*1+rdx] + shr r9,5 + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+rdx] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + add r11,7 + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + and r11,7 + + movq xmm0,QWORD PTR[(((-96)))+r12] + lea r14,QWORD PTR[256+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 +DB 067h + por xmm0,xmm1 + movq xmm1,QWORD PTR[((-96))+r14] +DB 067h + pand xmm3,xmm7 +DB 067h + por xmm0,xmm2 + movq xmm2,QWORD PTR[((-32))+r14] +DB 067h + pand xmm1,xmm4 +DB 067h + por xmm0,xmm3 + movq xmm3,QWORD PTR[32+r14] + +DB 102,72,15,126,195 + movq xmm0,QWORD PTR[96+r14] + mov QWORD PTR[((16+8))+rsp],r13 + mov QWORD PTR[((56+8))+rsp],rdi + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + lea rsi,QWORD PTR[r9*1+rsi] + neg r9 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + pand xmm2,xmm5 + pand xmm3,xmm6 + por xmm1,xmm2 + + imul rbp,r10 + + + + + + + + lea r14,QWORD PTR[((64+8))+r11*8+rsp] + mov r11,rdx + + pand xmm0,xmm7 + por xmm1,xmm3 + lea r12,QWORD PTR[512+r12] + por xmm0,xmm1 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + jmp $L$1st4x + +ALIGN 32 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + + add r15,32 + jnz $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + +DB 102,72,15,126,195 + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + jmp $L$outer4x + +ALIGN 32 +$L$outer4x:: + mov r10,QWORD PTR[r9*1+r14] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + + imul rbp,r10 +DB 067h + mov r11,rdx + mov QWORD PTR[r14],rdi + + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r14,QWORD PTR[r9*1+r14] + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov r13,rdx + jmp $L$inner4x + +ALIGN 32 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + add r10,QWORD PTR[r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov r13,rdx + + add r15,32 + jnz $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,rbp + mov rbp,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + +DB 102,72,15,126,195 + mov QWORD PTR[((-16))+r14],rdi + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r14] + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + cmp r12,QWORD PTR[((16+8))+rsp] + jb $L$outer4x + sub rbp,r13 + adc r15,r15 + or rdi,r15 + xor rdi,1 + lea rbx,QWORD PTR[r9*1+r14] + lea rbp,QWORD PTR[rdi*8+rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD PTR[((56+8))+rsp] + jmp $L$sqr4x_sub +mul4x_internal ENDP +PUBLIC bn_power5 + +ALIGN 32 +bn_power5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$pwr_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwr_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$power5_body:: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD PTR[40+rsp] + lea r8,QWORD PTR[32+rsp] + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$power5_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_power5:: +bn_power5 ENDP + +PUBLIC bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_internal PROC PUBLIC +__bn_sqr8x_internal:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,QWORD PTR[32+r10] + lea rsi,QWORD PTR[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,QWORD PTR[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + jmp $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD PTR[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD PTR[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,QWORD PTR[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,QWORD PTR[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + jmp $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer:: + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD PTR[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD PTR[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD PTR[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rbp*1+rdi],r10 + + lea rcx,QWORD PTR[rbp] + jmp $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD PTR[rcx*1+rdi] + adc r12,0 + +DB 067h + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD PTR[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD PTR[8+rcx*1+rdi] + lea rcx,QWORD PTR[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_inner + +DB 067h + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + add rbp,16 + jnz $L$sqr4x_outer + + + mov r14,QWORD PTR[((-32))+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD PTR[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD PTR[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD PTR[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD PTR[8+rdi],rax + mov QWORD PTR[16+rdi],rdx + mov QWORD PTR[24+rdi],r15 + + mov rax,QWORD PTR[((-16))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD PTR[8+rdi] + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + jmp $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add:: + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rdi],rbx + adc r8,rdx + + lea r12,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[8+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[16+rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + add rbp,32 + jnz $L$sqr4x_shift_n_add + + lea r12,QWORD PTR[r10*2+r14] +DB 067h + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD PTR[((-16))+rdi],rbx + mov QWORD PTR[((-8))+rdi],r8 +DB 102,72,15,126,213 +sqr8x_reduction:: + xor rax,rax + lea rcx,QWORD PTR[r9*2+rbp] + lea rdx,QWORD PTR[((48+8))+r9*2+rsp] + mov QWORD PTR[((0+8))+rsp],rcx + lea rdi,QWORD PTR[((48+8))+r9*1+rsp] + mov QWORD PTR[((8+8))+rsp],rdx + neg r9 + jmp $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop:: + lea rdi,QWORD PTR[r9*1+rdi] +DB 066h + mov rbx,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdx],rax + lea rdi,QWORD PTR[64+rdi] + +DB 067h + mov r8,rbx + imul rbx,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$8x_reduce + +ALIGN 32 +$L$8x_reduce:: + mul rbx + mov rax,QWORD PTR[16+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD PTR[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_reduce + + lea rbp,QWORD PTR[128+rbp] + xor rax,rax + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_no_tail + +DB 066h + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD PTR[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD PTR[rbp] + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail:: + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rbp] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + lea rdi,QWORD PTR[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD PTR[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_tail + + lea rbp,QWORD PTR[128+rbp] + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_tail_done + + mov rbx,QWORD PTR[((48+56+8))+rsp] + neg rsi + mov rax,QWORD PTR[rbp] + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail_done:: + add r8,QWORD PTR[rdx] + xor rax,rax + + neg rsi +$L$8x_no_tail:: + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + adc rax,0 + mov rcx,QWORD PTR[((-16))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + lea rdi,QWORD PTR[64+rdi] + + cmp rdi,rdx + jb $L$8x_reduction_loop + + sub rcx,r15 + lea rbx,QWORD PTR[r9*1+rdi] + adc rsi,rsi + mov rcx,r9 + or rax,rsi +DB 102,72,15,126,207 + xor rax,1 +DB 102,72,15,126,206 + lea rbp,QWORD PTR[rax*8+rbp] + sar rcx,3+2 + jmp $L$sqr4x_sub + +ALIGN 32 +$L$sqr4x_sub:: +DB 066h + mov r12,QWORD PTR[rbx] + mov r13,QWORD PTR[8+rbx] + sbb r12,QWORD PTR[rbp] + mov r14,QWORD PTR[16+rbx] + sbb r13,QWORD PTR[16+rbp] + mov r15,QWORD PTR[24+rbx] + lea rbx,QWORD PTR[32+rbx] + sbb r14,QWORD PTR[32+rbp] + mov QWORD PTR[rdi],r12 + sbb r15,QWORD PTR[48+rbp] + lea rbp,QWORD PTR[64+rbp] + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + lea rdi,QWORD PTR[32+rdi] + + inc rcx + jnz $L$sqr4x_sub + mov r10,r9 + neg r9 + DB 0F3h,0C3h ;repret +bn_sqr8x_internal ENDP +PUBLIC bn_from_montgomery + +ALIGN 32 +bn_from_montgomery PROC PUBLIC + test DWORD PTR[48+rsp],7 + jz bn_from_mont8x + xor eax,eax + DB 0F3h,0C3h ;repret +bn_from_montgomery ENDP + + +ALIGN 32 +bn_from_mont8x PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_from_mont8x:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$from_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$from_sp_done + +ALIGN 32 +$L$from_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$from_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$from_body:: + mov r11,r9 + lea rax,QWORD PTR[48+rsp] + pxor xmm0,xmm0 + jmp $L$mul_by_1 + +ALIGN 32 +$L$mul_by_1:: + movdqu xmm1,XMMWORD PTR[rsi] + movdqu xmm2,XMMWORD PTR[16+rsi] + movdqu xmm3,XMMWORD PTR[32+rsi] + movdqa XMMWORD PTR[r9*1+rax],xmm0 + movdqu xmm4,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[16+r9*1+rax],xmm0 +DB 048h,08dh,0b6h,040h,000h,000h,000h + movdqa XMMWORD PTR[rax],xmm1 + movdqa XMMWORD PTR[32+r9*1+rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm2 + movdqa XMMWORD PTR[48+r9*1+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm3 + movdqa XMMWORD PTR[48+rax],xmm4 + lea rax,QWORD PTR[64+rax] + sub r11,64 + jnz $L$mul_by_1 + +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 067h + mov rbp,rcx +DB 102,73,15,110,218 + call sqr8x_reduction + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + mov rsi,QWORD PTR[40+rsp] + jmp $L$from_mont_zero + +ALIGN 32 +$L$from_mont_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + sub r9,32 + jnz $L$from_mont_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$from_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_from_mont8x:: +bn_from_mont8x ENDP +PUBLIC bn_scatter5 + +ALIGN 16 +bn_scatter5 PROC PUBLIC + cmp edx,0 + jz $L$scatter_epilogue + lea r8,QWORD PTR[r9*8+r8] +$L$scatter:: + mov rax,QWORD PTR[rcx] + lea rcx,QWORD PTR[8+rcx] + mov QWORD PTR[r8],rax + lea r8,QWORD PTR[256+r8] + sub edx,1 + jnz $L$scatter +$L$scatter_epilogue:: + DB 0F3h,0C3h ;repret +bn_scatter5 ENDP + +PUBLIC bn_gather5 + +ALIGN 16 +bn_gather5 PROC PUBLIC +$L$SEH_begin_bn_gather5:: + +DB 048h,083h,0ech,028h +DB 00fh,029h,034h,024h +DB 00fh,029h,07ch,024h,010h + mov r11d,r9d + shr r9d,3 + and r11,7 + not r9d + lea rax,QWORD PTR[$L$magic_masks] + and r9d,3 + lea r8,QWORD PTR[128+r11*8+r8] + movq xmm4,QWORD PTR[r9*8+rax] + movq xmm5,QWORD PTR[8+r9*8+rax] + movq xmm6,QWORD PTR[16+r9*8+rax] + movq xmm7,QWORD PTR[24+r9*8+rax] + jmp $L$gather +ALIGN 16 +$L$gather:: + movq xmm0,QWORD PTR[(((-128)))+r8] + movq xmm1,QWORD PTR[((-64))+r8] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[r8] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[64+r8] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 +DB 067h,067h + por xmm0,xmm2 + lea r8,QWORD PTR[256+r8] + por xmm0,xmm3 + + movq QWORD PTR[rcx],xmm0 + lea rcx,QWORD PTR[8+rcx] + sub edx,1 + jnz $L$gather + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + lea rsp,QWORD PTR[40+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_gather5:: +bn_gather5 ENDP +ALIGN 64 +$L$magic_masks:: + DD 0,0,0,0,0,0,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 +DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 +DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 +DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 +DB 112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea r10,QWORD PTR[$L$mul_epilogue] + cmp rbx,r10 + jb $L$body_40 + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + jmp $L$body_proceed + +$L$body_40:: + mov rax,QWORD PTR[40+rax] +$L$body_proceed:: + + movaps xmm0,XMMWORD PTR[((-88))+rax] + movaps xmm1,XMMWORD PTR[((-72))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + movups XMMWORD PTR[512+r8],xmm0 + movups XMMWORD PTR[528+r8],xmm1 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mul_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont_gather5 + DD imagerel $L$SEH_end_bn_mul_mont_gather5 + DD imagerel $L$SEH_info_bn_mul_mont_gather5 + + DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 + + DD imagerel $L$SEH_begin_bn_power5 + DD imagerel $L$SEH_end_bn_power5 + DD imagerel $L$SEH_info_bn_power5 + + DD imagerel $L$SEH_begin_bn_from_mont8x + DD imagerel $L$SEH_end_bn_from_mont8x + DD imagerel $L$SEH_info_bn_from_mont8x + DD imagerel $L$SEH_begin_bn_gather5 + DD imagerel $L$SEH_end_bn_gather5 + DD imagerel $L$SEH_info_bn_gather5 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +ALIGN 8 +$L$SEH_info_bn_mul4x_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +ALIGN 8 +$L$SEH_info_bn_power5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$power5_body,imagerel $L$power5_epilogue +ALIGN 8 +$L$SEH_info_bn_from_mont8x:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$from_body,imagerel $L$from_epilogue +ALIGN 8 +$L$SEH_info_bn_gather5:: +DB 001h,00dh,005h,000h +DB 00dh,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,042h,000h,000h +ALIGN 8 + +.xdata ENDS +END diff --git a/win-x86_64/crypto/cpu-x86_64-asm.asm b/win-x86_64/crypto/cpu-x86_64-asm.asm new file mode 100644 index 0000000..dca66f5 --- /dev/null +++ b/win-x86_64/crypto/cpu-x86_64-asm.asm @@ -0,0 +1,158 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC OPENSSL_ia32_cpuid + +ALIGN 16 +OPENSSL_ia32_cpuid PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_OPENSSL_ia32_cpuid:: + mov rdi,rcx + + + + + mov rdi,rcx + mov r8,rbx + + xor eax,eax + mov DWORD PTR[8+rdi],eax + cpuid + mov r11d,eax + + xor eax,eax + cmp ebx,0756e6547h + setne al + mov r9d,eax + cmp edx,049656e69h + setne al + or r9d,eax + cmp ecx,06c65746eh + setne al + or r9d,eax + jz $L$intel + + cmp ebx,068747541h + setne al + mov r10d,eax + cmp edx,069746E65h + setne al + or r10d,eax + cmp ecx,0444D4163h + setne al + or r10d,eax + jnz $L$intel + + + + + mov eax,080000000h + cpuid + + + cmp eax,080000001h + jb $L$intel + mov r10d,eax + mov eax,080000001h + cpuid + + + or r9d,ecx + and r9d,000000801h + + cmp r10d,080000008h + jb $L$intel + + mov eax,080000008h + cpuid + + movzx r10,cl + inc r10 + + mov eax,1 + cpuid + + bt edx,28 + jnc $L$generic + shr ebx,16 + cmp bl,r10b + ja $L$generic + and edx,0efffffffh + jmp $L$generic + +$L$intel:: + cmp r11d,4 + mov r10d,-1 + jb $L$nocacheinfo + + mov eax,4 + mov ecx,0 + cpuid + mov r10d,eax + shr r10d,14 + and r10d,0fffh + + cmp r11d,7 + jb $L$nocacheinfo + + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD PTR[8+rdi],ebx + +$L$nocacheinfo:: + mov eax,1 + cpuid + + and edx,0bfefffffh + cmp r9d,0 + jne $L$notintel + or edx,040000000h + and ah,15 + cmp ah,15 + jne $L$notintel + or edx,000100000h +$L$notintel:: + bt edx,28 + jnc $L$generic + and edx,0efffffffh + cmp r10d,0 + je $L$generic + + or edx,010000000h + shr ebx,16 + cmp bl,1 + ja $L$generic + and edx,0efffffffh +$L$generic:: + and r9d,000000800h + and ecx,0fffff7ffh + or r9d,ecx + + mov r10d,edx + bt r9d,27 + jnc $L$clear_avx + xor ecx,ecx +DB 00fh,001h,0d0h + and eax,6 + cmp eax,6 + je $L$done +$L$clear_avx:: + mov eax,0efffe7ffh + and r9d,eax + and DWORD PTR[8+rdi],0ffffffdfh +$L$done:: + mov DWORD PTR[4+rdi],r9d + mov DWORD PTR[rdi],r10d + mov rbx,r8 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_OPENSSL_ia32_cpuid:: +OPENSSL_ia32_cpuid ENDP + + +.text$ ENDS +END diff --git a/win-x86_64/crypto/md5/md5-x86_64.asm b/win-x86_64/crypto/md5/md5-x86_64.asm new file mode 100644 index 0000000..d2faa88 --- /dev/null +++ b/win-x86_64/crypto/md5/md5-x86_64.asm @@ -0,0 +1,778 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +ALIGN 16 + +PUBLIC md5_block_asm_data_order + +md5_block_asm_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_md5_block_asm_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbp + push rbx + push r12 + push r14 + push r15 +$L$prologue:: + + + + + mov rbp,rdi + shl rdx,6 + lea rdi,QWORD PTR[rdx*1+rsi] + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + + + + + + + + cmp rsi,rdi + je $L$end + + +$L$loop:: + mov r8d,eax + mov r9d,ebx + mov r14d,ecx + mov r15d,edx + mov r10d,DWORD PTR[rsi] + mov r11d,edx + xor r11d,ecx + lea eax,DWORD PTR[((-680876936))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[4+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-389564586))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[8+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[606105819+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[12+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-1044525330))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[16+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[((-176418897))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[20+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[1200080426+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[24+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-1473231341))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[28+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-45705983))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[32+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[1770035416+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[36+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-1958414417))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[40+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-42063))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[44+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-1990404162))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[48+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[1804603682+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[52+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-40341101))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[56+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-1502002290))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[60+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[1236535329+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + mov r10d,DWORD PTR[4+rsi] + mov r11d,edx + mov r12d,edx + not r11d + lea eax,DWORD PTR[((-165796510))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[24+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-1069501632))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[44+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[643717713+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-373897302))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[20+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[((-701558691))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[40+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[38016083+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[60+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[((-660478335))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[16+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-405537848))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[36+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[568446438+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[56+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-1019803690))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[12+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[((-187363961))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[32+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[1163531501+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[52+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[((-1444681467))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[8+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-51403784))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[28+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[1735328473+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[48+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-1926607734))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + mov r10d,DWORD PTR[20+rsi] + mov r11d,ecx + lea eax,DWORD PTR[((-378558))+r10*1+rax] + mov r10d,DWORD PTR[32+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-2022574463))+r10*1+rdx] + mov r10d,DWORD PTR[44+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[1839030562+r10*1+rcx] + mov r10d,DWORD PTR[56+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-35309556))+r10*1+rbx] + mov r10d,DWORD PTR[4+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[((-1530992060))+r10*1+rax] + mov r10d,DWORD PTR[16+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[1272893353+r10*1+rdx] + mov r10d,DWORD PTR[28+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[((-155497632))+r10*1+rcx] + mov r10d,DWORD PTR[40+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-1094730640))+r10*1+rbx] + mov r10d,DWORD PTR[52+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[681279174+r10*1+rax] + mov r10d,DWORD PTR[rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-358537222))+r10*1+rdx] + mov r10d,DWORD PTR[12+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[((-722521979))+r10*1+rcx] + mov r10d,DWORD PTR[24+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[76029189+r10*1+rbx] + mov r10d,DWORD PTR[36+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[((-640364487))+r10*1+rax] + mov r10d,DWORD PTR[48+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-421815835))+r10*1+rdx] + mov r10d,DWORD PTR[60+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[530742520+r10*1+rcx] + mov r10d,DWORD PTR[8+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-995338651))+r10*1+rbx] + mov r10d,DWORD PTR[rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + mov r10d,DWORD PTR[rsi] + mov r11d,0ffffffffh + xor r11d,edx + lea eax,DWORD PTR[((-198630844))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[28+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[1126891415+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[56+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1416354905))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[20+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-57434055))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[48+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[1700485571+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[12+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-1894986606))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[40+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1051523))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[4+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-2054922799))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[32+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[1873313359+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[60+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-30611744))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[24+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1560198380))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[52+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[1309151649+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[16+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[((-145523070))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[44+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-1120210379))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[8+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[718787259+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[36+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-343485551))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + + add eax,r8d + add ebx,r9d + add ecx,r14d + add edx,r15d + + + add rsi,64 + cmp rsi,rdi + jb $L$loop + + +$L$end:: + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + mov r15,QWORD PTR[rsp] + mov r14,QWORD PTR[8+rsp] + mov r12,QWORD PTR[16+rsp] + mov rbx,QWORD PTR[24+rsp] + mov rbp,QWORD PTR[32+rsp] + add rsp,40 +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_md5_block_asm_data_order:: +md5_block_asm_data_order ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[40+rax] + + mov rbp,QWORD PTR[((-8))+rax] + mov rbx,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r14,QWORD PTR[((-32))+rax] + mov r15,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_md5_block_asm_data_order + DD imagerel $L$SEH_end_md5_block_asm_data_order + DD imagerel $L$SEH_info_md5_block_asm_data_order + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_md5_block_asm_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm b/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm new file mode 100644 index 0000000..828be8d --- /dev/null +++ b/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm @@ -0,0 +1,19 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC aesni_gcm_encrypt + +aesni_gcm_encrypt PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +aesni_gcm_encrypt ENDP + +PUBLIC aesni_gcm_decrypt + +aesni_gcm_decrypt PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +aesni_gcm_decrypt ENDP + +.text$ ENDS +END diff --git a/win-x86_64/crypto/modes/ghash-x86_64.asm b/win-x86_64/crypto/modes/ghash-x86_64.asm new file mode 100644 index 0000000..9993d75 --- /dev/null +++ b/win-x86_64/crypto/modes/ghash-x86_64.asm @@ -0,0 +1,1510 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC gcm_gmult_4bit + +ALIGN 16 +gcm_gmult_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_gmult_4bit:: + mov rdi,rcx + mov rsi,rdx + + + push rbx + push rbp + push r12 +$L$gmult_prologue:: + + movzx r8,BYTE PTR[15+rdi] + lea r11,QWORD PTR[$L$rem_4bit] + xor rax,rax + xor rbx,rbx + mov al,r8b + mov bl,r8b + shl al,4 + mov rcx,14 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + mov rdx,r8 + jmp $L$oop1 + +ALIGN 16 +$L$oop1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + mov al,BYTE PTR[rcx*1+rdi] + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + mov bl,al + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + shl al,4 + xor r8,r10 + dec rcx + js $L$break1 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + jmp $L$oop1 + +ALIGN 16 +$L$break1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + xor r8,r10 + xor r9,QWORD PTR[rdx*8+r11] + + bswap r8 + bswap r9 + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + mov rbx,QWORD PTR[16+rsp] + lea rsp,QWORD PTR[24+rsp] +$L$gmult_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_gmult_4bit:: +gcm_gmult_4bit ENDP +PUBLIC gcm_ghash_4bit + +ALIGN 16 +gcm_ghash_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_ghash_4bit:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,280 +$L$ghash_prologue:: + mov r14,rdx + mov r15,rcx + sub rsi,-128 + lea rbp,QWORD PTR[((16+128))+rsp] + xor edx,edx + mov r8,QWORD PTR[((0+0-128))+rsi] + mov rax,QWORD PTR[((0+8-128))+rsi] + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov r9,QWORD PTR[((16+0-128))+rsi] + shl dl,4 + mov rbx,QWORD PTR[((16+8-128))+rsi] + shl r10,60 + mov BYTE PTR[rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[rbp],r8 + mov r8,QWORD PTR[((32+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((0-128))+rbp],rax + mov rax,QWORD PTR[((32+8-128))+rsi] + shl r10,60 + mov BYTE PTR[1+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[8+rbp],r9 + mov r9,QWORD PTR[((48+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((8-128))+rbp],rbx + mov rbx,QWORD PTR[((48+8-128))+rsi] + shl r10,60 + mov BYTE PTR[2+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[16+rbp],r8 + mov r8,QWORD PTR[((64+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((16-128))+rbp],rax + mov rax,QWORD PTR[((64+8-128))+rsi] + shl r10,60 + mov BYTE PTR[3+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[24+rbp],r9 + mov r9,QWORD PTR[((80+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((24-128))+rbp],rbx + mov rbx,QWORD PTR[((80+8-128))+rsi] + shl r10,60 + mov BYTE PTR[4+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[32+rbp],r8 + mov r8,QWORD PTR[((96+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((32-128))+rbp],rax + mov rax,QWORD PTR[((96+8-128))+rsi] + shl r10,60 + mov BYTE PTR[5+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[40+rbp],r9 + mov r9,QWORD PTR[((112+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((40-128))+rbp],rbx + mov rbx,QWORD PTR[((112+8-128))+rsi] + shl r10,60 + mov BYTE PTR[6+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[48+rbp],r8 + mov r8,QWORD PTR[((128+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((48-128))+rbp],rax + mov rax,QWORD PTR[((128+8-128))+rsi] + shl r10,60 + mov BYTE PTR[7+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[56+rbp],r9 + mov r9,QWORD PTR[((144+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((56-128))+rbp],rbx + mov rbx,QWORD PTR[((144+8-128))+rsi] + shl r10,60 + mov BYTE PTR[8+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[64+rbp],r8 + mov r8,QWORD PTR[((160+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((64-128))+rbp],rax + mov rax,QWORD PTR[((160+8-128))+rsi] + shl r10,60 + mov BYTE PTR[9+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[72+rbp],r9 + mov r9,QWORD PTR[((176+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((72-128))+rbp],rbx + mov rbx,QWORD PTR[((176+8-128))+rsi] + shl r10,60 + mov BYTE PTR[10+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[80+rbp],r8 + mov r8,QWORD PTR[((192+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((80-128))+rbp],rax + mov rax,QWORD PTR[((192+8-128))+rsi] + shl r10,60 + mov BYTE PTR[11+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[88+rbp],r9 + mov r9,QWORD PTR[((208+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((88-128))+rbp],rbx + mov rbx,QWORD PTR[((208+8-128))+rsi] + shl r10,60 + mov BYTE PTR[12+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[96+rbp],r8 + mov r8,QWORD PTR[((224+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((96-128))+rbp],rax + mov rax,QWORD PTR[((224+8-128))+rsi] + shl r10,60 + mov BYTE PTR[13+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[104+rbp],r9 + mov r9,QWORD PTR[((240+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((104-128))+rbp],rbx + mov rbx,QWORD PTR[((240+8-128))+rsi] + shl r10,60 + mov BYTE PTR[14+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[112+rbp],r8 + shl dl,4 + mov QWORD PTR[((112-128))+rbp],rax + shl r10,60 + mov BYTE PTR[15+rsp],dl + or rbx,r10 + mov QWORD PTR[120+rbp],r9 + mov QWORD PTR[((120-128))+rbp],rbx + add rsi,-128 + mov r8,QWORD PTR[8+rdi] + mov r9,QWORD PTR[rdi] + add r15,r14 + lea r11,QWORD PTR[$L$rem_8bit] + jmp $L$outer_loop +ALIGN 16 +$L$outer_loop:: + xor r9,QWORD PTR[r14] + mov rdx,QWORD PTR[8+r14] + lea r14,QWORD PTR[16+r14] + xor rdx,r8 + mov QWORD PTR[rdi],r9 + mov QWORD PTR[8+rdi],rdx + shr rdx,32 + xor rax,rax + rol edx,8 + mov al,dl + movzx ebx,dl + shl al,4 + shr ebx,4 + rol edx,8 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + mov al,dl + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + xor r12,r8 + mov r10,r9 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[8+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[4+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + and ecx,240 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[((-4))+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + movzx r12,WORD PTR[r12*2+r11] + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + shl r12,48 + xor r8,r10 + xor r9,r12 + movzx r13,r8b + shr r8,4 + mov r10,r9 + shl r13b,4 + shr r9,4 + xor r8,QWORD PTR[8+rcx*1+rsi] + movzx r13,WORD PTR[r13*2+r11] + shl r10,60 + xor r9,QWORD PTR[rcx*1+rsi] + xor r8,r10 + shl r13,48 + bswap r8 + xor r9,r13 + bswap r9 + cmp r14,r15 + jb $L$outer_loop + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + lea rsi,QWORD PTR[280+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$ghash_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_ghash_4bit:: +gcm_ghash_4bit ENDP +PUBLIC gcm_init_clmul + +ALIGN 16 +gcm_init_clmul PROC PUBLIC +$L$_init_clmul:: +$L$SEH_begin_gcm_init_clmul:: + +DB 048h,083h,0ech,018h +DB 00fh,029h,034h,024h + movdqu xmm2,XMMWORD PTR[rdx] + pshufd xmm2,xmm2,78 + + + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + + + pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial] + pxor xmm2,xmm5 + + + pshufd xmm6,xmm2,78 + movdqa xmm0,xmm2 + pxor xmm6,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD PTR[rcx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[16+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm5,xmm0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm5,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm5 + movdqu XMMWORD PTR[48+rcx],xmm5 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[64+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[80+rcx],xmm4 + movaps xmm6,XMMWORD PTR[rsp] + lea rsp,QWORD PTR[24+rsp] +$L$SEH_end_gcm_init_clmul:: + DB 0F3h,0C3h ;repret +gcm_init_clmul ENDP +PUBLIC gcm_gmult_clmul + +ALIGN 16 +gcm_gmult_clmul PROC PUBLIC +$L$_gmult_clmul:: + movdqu xmm0,XMMWORD PTR[rcx] + movdqa xmm5,XMMWORD PTR[$L$bswap_mask] + movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm4,XMMWORD PTR[32+rdx] +DB 102,15,56,0,197 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD PTR[rcx],xmm0 + DB 0F3h,0C3h ;repret +gcm_gmult_clmul ENDP +PUBLIC gcm_ghash_clmul + +ALIGN 32 +gcm_ghash_clmul PROC PUBLIC +$L$_ghash_clmul:: + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_gcm_ghash_clmul:: + +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm10,XMMWORD PTR[$L$bswap_mask] + + movdqu xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm7,XMMWORD PTR[32+rdx] +DB 102,65,15,56,0,194 + + sub r9,010h + jz $L$odd_tail + + movdqu xmm6,XMMWORD PTR[16+rdx] + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp r9,030h + jb $L$skip4x + + and eax,71303168 + cmp eax,4194304 + je $L$skip4x + + sub r9,030h + mov rax,0A040608020C0E000h + movdqu xmm14,XMMWORD PTR[48+rdx] + movdqu xmm15,XMMWORD PTR[64+rdx] + + + + + movdqu xmm3,XMMWORD PTR[48+r8] + movdqu xmm11,XMMWORD PTR[32+r8] +DB 102,65,15,56,0,218 +DB 102,69,15,56,0,218 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm12,xmm11 +DB 102,68,15,58,68,222,0 +DB 102,68,15,58,68,238,17 +DB 102,68,15,58,68,231,16 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] + xorps xmm4,xmm12 + + movdqu xmm11,XMMWORD PTR[16+r8] + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,218 +DB 102,69,15,56,0,194 + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + movdqa xmm1,xmm0 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 +DB 102,69,15,58,68,238,17 +DB 102,68,15,58,68,231,0 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jc $L$tail4x + + jmp $L$mod4_loop +ALIGN 32 +$L$mod4_loop:: +DB 102,65,15,58,68,199,0 + xorps xmm4,xmm12 + movdqu xmm11,XMMWORD PTR[48+r8] +DB 102,69,15,56,0,218 +DB 102,65,15,58,68,207,17 + xorps xmm0,xmm3 + movdqu xmm3,XMMWORD PTR[32+r8] + movdqa xmm13,xmm11 +DB 102,68,15,58,68,199,16 + pshufd xmm12,xmm11,78 + xorps xmm1,xmm5 + pxor xmm12,xmm11 +DB 102,65,15,56,0,218 + movups xmm7,XMMWORD PTR[32+rdx] + xorps xmm8,xmm4 +DB 102,68,15,58,68,218,0 + pshufd xmm4,xmm3,78 + + pxor xmm8,xmm0 + movdqa xmm5,xmm3 + pxor xmm8,xmm1 + pxor xmm4,xmm3 + movdqa xmm9,xmm8 +DB 102,68,15,58,68,234,17 + pslldq xmm8,8 + psrldq xmm9,8 + pxor xmm0,xmm8 + movdqa xmm8,XMMWORD PTR[$L$7_mask] + pxor xmm1,xmm9 +DB 102,76,15,110,200 + + pand xmm8,xmm0 +DB 102,69,15,56,0,200 + pxor xmm9,xmm0 +DB 102,68,15,58,68,231,0 + psllq xmm9,57 + movdqa xmm8,xmm9 + pslldq xmm9,8 +DB 102,15,58,68,222,0 + psrldq xmm8,8 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + movdqu xmm8,XMMWORD PTR[r8] + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,238,17 + xorps xmm3,xmm11 + movdqu xmm11,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,218 +DB 102,15,58,68,231,16 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] +DB 102,69,15,56,0,194 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + + movdqa xmm13,xmm11 + pxor xmm4,xmm12 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm1,xmm0 +DB 102,69,15,58,68,238,17 + xorps xmm3,xmm11 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 + +DB 102,68,15,58,68,231,0 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jnc $L$mod4_loop + +$L$tail4x:: +DB 102,65,15,58,68,199,0 +DB 102,65,15,58,68,207,17 +DB 102,68,15,58,68,199,16 + xorps xmm4,xmm12 + xorps xmm0,xmm3 + xorps xmm1,xmm5 + pxor xmm1,xmm0 + pxor xmm8,xmm4 + + pxor xmm8,xmm1 + pxor xmm1,xmm0 + + movdqa xmm9,xmm8 + psrldq xmm8,8 + pslldq xmm9,8 + pxor xmm1,xmm8 + pxor xmm0,xmm9 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + add r9,040h + jz $L$done + movdqu xmm7,XMMWORD PTR[32+rdx] + sub r9,010h + jz $L$odd_tail +$L$skip4x:: + + + + + + movdqu xmm8,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,194 +DB 102,65,15,56,0,218 + pxor xmm0,xmm8 + + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + lea r8,QWORD PTR[32+r8] + nop + sub r9,020h + jbe $L$even_tail + nop + jmp $L$mod_loop + +ALIGN 32 +$L$mod_loop:: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + movdqu xmm9,XMMWORD PTR[r8] + pxor xmm8,xmm0 +DB 102,69,15,56,0,202 + movdqu xmm3,XMMWORD PTR[16+r8] + + pxor xmm8,xmm1 + pxor xmm1,xmm9 + pxor xmm4,xmm8 +DB 102,65,15,56,0,218 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm5,xmm3 + + movdqa xmm9,xmm0 + movdqa xmm8,xmm0 + psllq xmm0,5 + pxor xmm8,xmm0 +DB 102,15,58,68,218,0 + psllq xmm0,1 + pxor xmm0,xmm8 + psllq xmm0,57 + movdqa xmm8,xmm0 + pslldq xmm0,8 + psrldq xmm8,8 + pxor xmm0,xmm9 + pshufd xmm4,xmm5,78 + pxor xmm1,xmm8 + pxor xmm4,xmm5 + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,234,17 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm9 + lea r8,QWORD PTR[32+r8] + psrlq xmm0,1 +DB 102,15,58,68,231,0 + pxor xmm0,xmm1 + + sub r9,020h + ja $L$mod_loop + +$L$even_tail:: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + pxor xmm8,xmm0 + pxor xmm8,xmm1 + pxor xmm4,xmm8 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test r9,r9 + jnz $L$done + +$L$odd_tail:: + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,194 + pxor xmm0,xmm8 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,223,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L$done:: +DB 102,65,15,56,0,194 + movdqu XMMWORD PTR[rcx],xmm0 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_gcm_ghash_clmul:: + DB 0F3h,0C3h ;repret +gcm_ghash_clmul ENDP +PUBLIC gcm_init_avx + +ALIGN 32 +gcm_init_avx PROC PUBLIC + jmp $L$_init_clmul +gcm_init_avx ENDP +PUBLIC gcm_gmult_avx + +ALIGN 32 +gcm_gmult_avx PROC PUBLIC + jmp $L$_gmult_clmul +gcm_gmult_avx ENDP +PUBLIC gcm_ghash_avx + +ALIGN 32 +gcm_ghash_avx PROC PUBLIC + jmp $L$_ghash_clmul +gcm_ghash_avx ENDP +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$0x1c2_polynomial:: +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h +$L$7_mask:: + DD 7,0,7,0 +$L$7_mask_poly:: + DD 7,0,450,0 +ALIGN 64 + +$L$rem_4bit:: + DD 0,0,0,471859200,0,943718400,0,610271232 + DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 + DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 + DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +$L$rem_8bit:: + DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh + DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh + DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh + DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh + DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh + DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh + DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh + DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh + DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh + DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh + DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh + DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh + DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh + DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh + DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh + DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh + DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh + DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh + DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh + DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh + DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh + DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh + DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh + DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh + DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh + DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh + DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh + DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh + DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh + DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh + DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh + DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh + +DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[24+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_gcm_gmult_4bit + DD imagerel $L$SEH_end_gcm_gmult_4bit + DD imagerel $L$SEH_info_gcm_gmult_4bit + + DD imagerel $L$SEH_begin_gcm_ghash_4bit + DD imagerel $L$SEH_end_gcm_ghash_4bit + DD imagerel $L$SEH_info_gcm_ghash_4bit + + DD imagerel $L$SEH_begin_gcm_init_clmul + DD imagerel $L$SEH_end_gcm_init_clmul + DD imagerel $L$SEH_info_gcm_init_clmul + + DD imagerel $L$SEH_begin_gcm_ghash_clmul + DD imagerel $L$SEH_end_gcm_ghash_clmul + DD imagerel $L$SEH_info_gcm_ghash_clmul +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_gcm_gmult_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue +$L$SEH_info_gcm_ghash_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue +$L$SEH_info_gcm_init_clmul:: +DB 001h,008h,003h,000h +DB 008h,068h,000h,000h +DB 004h,022h,000h,000h +$L$SEH_info_gcm_ghash_clmul:: +DB 001h,033h,016h,000h +DB 033h,0f8h,009h,000h +DB 02eh,0e8h,008h,000h +DB 029h,0d8h,007h,000h +DB 024h,0c8h,006h,000h +DB 01fh,0b8h,005h,000h +DB 01ah,0a8h,004h,000h +DB 015h,098h,003h,000h +DB 010h,088h,002h,000h +DB 00ch,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,001h,015h,000h + +.xdata ENDS +END diff --git a/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm b/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm new file mode 100644 index 0000000..9d823ae --- /dev/null +++ b/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm @@ -0,0 +1,1374 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +ALIGN 16 + +PUBLIC rc4_md5_enc + +rc4_md5_enc PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rc4_md5_enc:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp r9,0 + je $L$abort + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,40 +$L$body:: + mov r11,rcx + mov r12,r9 + mov r13,rsi + mov r14,rdx + mov r15,r8 + xor rbp,rbp + xor rcx,rcx + + lea rdi,QWORD PTR[8+rdi] + mov bpl,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] + + inc bpl + sub r14,r13 + mov eax,DWORD PTR[rbp*4+rdi] + add cl,al + lea rsi,QWORD PTR[rbp*4+rdi] + shl r12,6 + add r12,r15 + mov QWORD PTR[16+rsp],r12 + + mov QWORD PTR[24+rsp],r11 + mov r8d,DWORD PTR[r11] + mov r9d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] + mov r11d,DWORD PTR[12+r11] + jmp $L$oop + +ALIGN 16 +$L$oop:: + mov DWORD PTR[rsp],r8d + mov DWORD PTR[4+rsp],r9d + mov DWORD PTR[8+rsp],r10d + mov r12d,r11d + mov DWORD PTR[12+rsp],r11d + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,3614090360 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,3905402710 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,606105819 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[8+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,3250441966 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,4118548399 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[16+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1200080426 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,2821735955 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[24+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,4249261313 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,1770035416 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[32+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,2336552879 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,4294925233 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[40+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,2304563134 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,1804603682 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[48+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,4254626195 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,2792965006 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[56+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm2,XMMWORD PTR[r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,1236535329 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4129170786 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,3225465664 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,643717713 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,3921069994 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,3593408605 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,38016083 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,3634488961 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,3889429448 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,568446438 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,3275163606 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,4107603335 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1163531501 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,2850285829 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,4243563512 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,1735328473 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm3,XMMWORD PTR[16+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,2368359562 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,4294588738 + movzx eax,al + add r8d,r12d + mov DWORD PTR[rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,2272392833 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[4+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,1839030562 + movzx eax,al + add r10d,r12d + mov DWORD PTR[8+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,4259657740 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[12+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,2763975236 + movzx eax,al + add r8d,r12d + mov DWORD PTR[16+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1272893353 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[20+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,4139469664 + movzx eax,al + add r10d,r12d + mov DWORD PTR[24+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,3200236656 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[28+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,681279174 + movzx eax,al + add r8d,r12d + mov DWORD PTR[32+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,3936430074 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[36+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,3572445317 + movzx eax,al + add r10d,r12d + mov DWORD PTR[40+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,76029189 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[44+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,3654602809 + movzx eax,al + add r8d,r12d + mov DWORD PTR[48+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,3873151461 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[52+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,530742520 + movzx eax,al + add r10d,r12d + mov DWORD PTR[56+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm4,XMMWORD PTR[32+r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,3299628645 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[60+rsi],edx + add cl,al + rol r9d,23 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm4,xmm0 + pxor xmm4,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4096336452 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,1126891415 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,2878612391 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,4237533241 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,1700485571 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,2399980690 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,4293915773 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,2240044497 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,1873313359 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,4264355552 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,2734768916 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1309151649 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,4149444226 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,3174756917 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,718787259 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm5,XMMWORD PTR[48+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,3951481745 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rbp + xor rbp,rbp + mov bpl,sil + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm5,xmm0 + pxor xmm5,xmm1 + add r8d,DWORD PTR[rsp] + add r9d,DWORD PTR[4+rsp] + add r10d,DWORD PTR[8+rsp] + add r11d,DWORD PTR[12+rsp] + + movdqu XMMWORD PTR[r13*1+r14],xmm2 + movdqu XMMWORD PTR[16+r13*1+r14],xmm3 + movdqu XMMWORD PTR[32+r13*1+r14],xmm4 + movdqu XMMWORD PTR[48+r13*1+r14],xmm5 + lea r15,QWORD PTR[64+r15] + lea r13,QWORD PTR[64+r13] + cmp r15,QWORD PTR[16+rsp] + jb $L$oop + + mov r12,QWORD PTR[24+rsp] + sub cl,al + mov DWORD PTR[r12],r8d + mov DWORD PTR[4+r12],r9d + mov DWORD PTR[8+r12],r10d + mov DWORD PTR[12+r12],r11d + sub bpl,1 + mov DWORD PTR[((-8))+rdi],ebp + mov DWORD PTR[((-4))+rdi],ecx + + mov r15,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r13,QWORD PTR[56+rsp] + mov r12,QWORD PTR[64+rsp] + mov rbp,QWORD PTR[72+rsp] + mov rbx,QWORD PTR[80+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$epilogue:: +$L$abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rc4_md5_enc:: +rc4_md5_enc ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov r15,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r13,QWORD PTR[56+rax] + mov r12,QWORD PTR[64+rax] + mov rbp,QWORD PTR[72+rax] + mov rbx,QWORD PTR[80+rax] + lea rax,QWORD PTR[88+rax] + + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rc4_md5_enc + DD imagerel $L$SEH_end_rc4_md5_enc + DD imagerel $L$SEH_info_rc4_md5_enc + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rc4_md5_enc:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/rc4/rc4-x86_64.asm b/win-x86_64/crypto/rc4/rc4-x86_64.asm new file mode 100644 index 0000000..c183cac --- /dev/null +++ b/win-x86_64/crypto/rc4/rc4-x86_64.asm @@ -0,0 +1,773 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC asm_RC4 + +ALIGN 16 +asm_RC4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_RC4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + or rsi,rsi + jne $L$entry + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$entry:: + push rbx + push r12 + push r13 +$L$prologue:: + mov r11,rsi + mov r12,rdx + mov r13,rcx + xor r10,r10 + xor rcx,rcx + + lea rdi,QWORD PTR[8+rdi] + mov r10b,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] + cmp DWORD PTR[256+rdi],-1 + je $L$RC4_CHAR + mov r8d,DWORD PTR[OPENSSL_ia32cap_P] + xor rbx,rbx + inc r10b + sub rbx,r10 + sub r13,r12 + mov eax,DWORD PTR[r10*4+rdi] + test r11,-16 + jz $L$loop1 + bt r8d,30 + jc $L$intel + and rbx,7 + lea rsi,QWORD PTR[1+r10] + jz $L$oop8 + sub r11,rbx +$L$oop8_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop8_warmup + + lea rsi,QWORD PTR[1+r10] + jmp $L$oop8 +ALIGN 16 +$L$oop8:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[rsi*4+rdi] + ror r8,8 + mov DWORD PTR[r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[4+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[4+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[8+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[8+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[12+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[12+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[16+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[16+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[20+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[20+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[24+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[24+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add sil,8 + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[((-4))+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[28+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add r10b,8 + ror r8,8 + sub r11,8 + + xor r8,QWORD PTR[r12] + mov QWORD PTR[r13*1+r12],r8 + lea r12,QWORD PTR[8+r12] + + test r11,-8 + jnz $L$oop8 + cmp r11,0 + jne $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$intel:: + test r11,-32 + jz $L$loop1 + and rbx,15 + jz $L$oop16_is_hot + sub r11,rbx +$L$oop16_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop16_warmup + + mov rbx,rcx + xor rcx,rcx + mov cl,bl + +$L$oop16_is_hot:: + lea rsi,QWORD PTR[r10*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + jmp $L$oop16_enter +ALIGN 16 +$L$oop16:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm2,xmm0 + psllq xmm1,8 + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + pxor xmm2,xmm1 + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + movdqu XMMWORD PTR[r13*1+r12],xmm2 + lea r12,QWORD PTR[16+r12] +$L$oop16_enter:: + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm1,xmm1 + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[8+rsi] + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],0 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[12+rsi] + movzx eax,al + mov DWORD PTR[8+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[16+rsi] + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[20+rsi] + movzx eax,al + mov DWORD PTR[16+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[24+rsi] + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[28+rsi] + movzx eax,al + mov DWORD PTR[24+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[32+rsi] + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[36+rsi] + movzx eax,al + mov DWORD PTR[32+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[40+rsi] + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[44+rsi] + movzx eax,al + mov DWORD PTR[40+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[48+rsi] + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[52+rsi] + movzx eax,al + mov DWORD PTR[48+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[56+rsi] + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[60+rsi] + movzx eax,al + mov DWORD PTR[56+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + add r10b,16 + movdqu xmm2,XMMWORD PTR[r12] + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + lea rsi,QWORD PTR[r10*4+rdi] + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + mov eax,DWORD PTR[rsi] + mov rbx,rcx + xor rcx,rcx + sub r11,16 + mov cl,bl + test r11,-16 + jnz $L$oop16 + + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + movdqu XMMWORD PTR[r13*1+r12],xmm2 + lea r12,QWORD PTR[16+r12] + + cmp r11,0 + jne $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$loop1:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec r11 + jnz $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$RC4_CHAR:: + add r10b,1 + movzx eax,BYTE PTR[r10*1+rdi] + test r11,-8 + jz $L$cloop1 + jmp $L$cloop8 +ALIGN 16 +$L$cloop8:: + mov r8d,DWORD PTR[r12] + mov r9d,DWORD PTR[4+r12] + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov0 + mov rbx,rax +$L$cmov0:: + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov1 + mov rax,rbx +$L$cmov1:: + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov2 + mov rbx,rax +$L$cmov2:: + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov3 + mov rax,rbx +$L$cmov3:: + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov4 + mov rbx,rax +$L$cmov4:: + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov5 + mov rax,rbx +$L$cmov5:: + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov6 + mov rbx,rax +$L$cmov6:: + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov7 + mov rax,rbx +$L$cmov7:: + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + lea r11,QWORD PTR[((-8))+r11] + mov DWORD PTR[r13],r8d + lea r12,QWORD PTR[8+r12] + mov DWORD PTR[4+r13],r9d + lea r13,QWORD PTR[8+r13] + + test r11,-8 + jnz $L$cloop8 + cmp r11,0 + jne $L$cloop1 + jmp $L$exit +ALIGN 16 +$L$cloop1:: + add cl,al + movzx ecx,cl + movzx edx,BYTE PTR[rcx*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + mov BYTE PTR[r10*1+rdi],dl + add dl,al + add r10b,1 + movzx edx,dl + movzx r10d,r10b + movzx edx,BYTE PTR[rdx*1+rdi] + movzx eax,BYTE PTR[r10*1+rdi] + xor dl,BYTE PTR[r12] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[r13],dl + lea r13,QWORD PTR[1+r13] + sub r11,1 + jnz $L$cloop1 + jmp $L$exit + +ALIGN 16 +$L$exit:: + sub r10b,1 + mov DWORD PTR[((-8))+rdi],r10d + mov DWORD PTR[((-4))+rdi],ecx + + mov r13,QWORD PTR[rsp] + mov r12,QWORD PTR[8+rsp] + mov rbx,QWORD PTR[16+rsp] + add rsp,24 +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_RC4:: +asm_RC4 ENDP +PUBLIC asm_RC4_set_key + +ALIGN 16 +asm_RC4_set_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_RC4_set_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rdi,QWORD PTR[8+rdi] + lea rdx,QWORD PTR[rsi*1+rdx] + neg rsi + mov rcx,rsi + xor eax,eax + xor r9,r9 + xor r10,r10 + xor r11,r11 + + mov r8d,DWORD PTR[OPENSSL_ia32cap_P] + bt r8d,20 + jc $L$c1stloop + jmp $L$w1stloop + +ALIGN 16 +$L$w1stloop:: + mov DWORD PTR[rax*4+rdi],eax + add al,1 + jnc $L$w1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$w2ndloop:: + mov r10d,DWORD PTR[r9*4+rdi] + add r8b,BYTE PTR[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11d,DWORD PTR[r8*4+rdi] + cmovz rsi,rcx + mov DWORD PTR[r8*4+rdi],r10d + mov DWORD PTR[r9*4+rdi],r11d + add r9b,1 + jnc $L$w2ndloop + jmp $L$exit_key + +ALIGN 16 +$L$c1stloop:: + mov BYTE PTR[rax*1+rdi],al + add al,1 + jnc $L$c1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$c2ndloop:: + mov r10b,BYTE PTR[r9*1+rdi] + add r8b,BYTE PTR[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11b,BYTE PTR[r8*1+rdi] + jnz $L$cnowrap + mov rsi,rcx +$L$cnowrap:: + mov BYTE PTR[r8*1+rdi],r10b + mov BYTE PTR[r9*1+rdi],r11b + add r9b,1 + jnc $L$c2ndloop + mov DWORD PTR[256+rdi],-1 + +ALIGN 16 +$L$exit_key:: + xor eax,eax + mov DWORD PTR[((-8))+rdi],eax + mov DWORD PTR[((-4))+rdi],eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_RC4_set_key:: +asm_RC4_set_key ENDP + +PUBLIC RC4_options + +ALIGN 16 +RC4_options PROC PUBLIC + lea rax,QWORD PTR[$L$opts] + mov rdx,QWORD PTR[OPENSSL_ia32cap_P] + mov edx,DWORD PTR[rdx] + bt edx,20 + jc $L$8xchar + bt edx,30 + jnc $L$done + add rax,25 + DB 0F3h,0C3h ;repret +$L$8xchar:: + add rax,12 +$L$done:: + DB 0F3h,0C3h ;repret +ALIGN 64 +$L$opts:: +DB 114,99,52,40,56,120,44,105,110,116,41,0 +DB 114,99,52,40,56,120,44,99,104,97,114,41,0 +DB 114,99,52,40,49,54,120,44,105,110,116,41,0 +DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +RC4_options ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +stream_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[24+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov r12,QWORD PTR[((-16))+rax] + mov r13,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +stream_se_handler ENDP + + +ALIGN 16 +key_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +key_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_asm_RC4 + DD imagerel $L$SEH_end_asm_RC4 + DD imagerel $L$SEH_info_asm_RC4 + + DD imagerel $L$SEH_begin_asm_RC4_set_key + DD imagerel $L$SEH_end_asm_RC4_set_key + DD imagerel $L$SEH_info_asm_RC4_set_key + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_asm_RC4:: +DB 9,0,0,0 + DD imagerel stream_se_handler +$L$SEH_info_asm_RC4_set_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/sha/sha1-x86_64.asm b/win-x86_64/crypto/sha/sha1-x86_64.asm new file mode 100644 index 0000000..ecda6dc --- /dev/null +++ b/win-x86_64/crypto/sha/sha1-x86_64.asm @@ -0,0 +1,2619 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha1_block_data_order + +ALIGN 16 +sha1_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + test r8d,512 + jz $L$ialu + jmp _ssse3_shortcut + +ALIGN 16 +$L$ialu:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD PTR[64+rsp],rax +$L$prologue:: + + mov esi,DWORD PTR[r8] + mov edi,DWORD PTR[4+r8] + mov r11d,DWORD PTR[8+r8] + mov r12d,DWORD PTR[12+r8] + mov r13d,DWORD PTR[16+r8] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov edx,DWORD PTR[r9] + bswap edx + mov ebp,DWORD PTR[4+r9] + mov eax,r12d + mov DWORD PTR[rsp],edx + mov ecx,esi + bswap ebp + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov r14d,DWORD PTR[8+r9] + mov eax,r11d + mov DWORD PTR[4+rsp],ebp + mov ecx,r13d + bswap r14d + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov edx,DWORD PTR[12+r9] + mov eax,edi + mov DWORD PTR[8+rsp],r14d + mov ecx,r12d + bswap edx + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+r14] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov ebp,DWORD PTR[16+r9] + mov eax,esi + mov DWORD PTR[12+rsp],edx + mov ecx,r11d + bswap ebp + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov r14d,DWORD PTR[20+r9] + mov eax,r13d + mov DWORD PTR[16+rsp],ebp + mov ecx,edi + bswap r14d + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD PTR[24+r9] + mov eax,r12d + mov DWORD PTR[20+rsp],r14d + mov ecx,esi + bswap edx + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+r14] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov ebp,DWORD PTR[28+r9] + mov eax,r11d + mov DWORD PTR[24+rsp],edx + mov ecx,r13d + bswap ebp + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov r14d,DWORD PTR[32+r9] + mov eax,edi + mov DWORD PTR[28+rsp],ebp + mov ecx,r12d + bswap r14d + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov edx,DWORD PTR[36+r9] + mov eax,esi + mov DWORD PTR[32+rsp],r14d + mov ecx,r11d + bswap edx + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+r14] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov ebp,DWORD PTR[40+r9] + mov eax,r13d + mov DWORD PTR[36+rsp],edx + mov ecx,edi + bswap ebp + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov r14d,DWORD PTR[44+r9] + mov eax,r12d + mov DWORD PTR[40+rsp],ebp + mov ecx,esi + bswap r14d + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov edx,DWORD PTR[48+r9] + mov eax,r11d + mov DWORD PTR[44+rsp],r14d + mov ecx,r13d + bswap edx + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+r14] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov ebp,DWORD PTR[52+r9] + mov eax,edi + mov DWORD PTR[48+rsp],edx + mov ecx,r12d + bswap ebp + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov r14d,DWORD PTR[56+r9] + mov eax,esi + mov DWORD PTR[52+rsp],ebp + mov ecx,r11d + bswap r14d + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov edx,DWORD PTR[60+r9] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d + mov ecx,edi + bswap edx + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+r14] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[8+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + rol edi,30 + xor eax,r12d + add r13d,ecx + rol ebp,1 + add r13d,eax + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[12+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[36+rsp] + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + rol esi,30 + xor eax,r11d + add r12d,ecx + rol r14d,1 + add r12d,eax + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[16+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+r14] + rol r13d,30 + xor eax,edi + add r11d,ecx + rol edx,1 + add r11d,eax + xor ebp,DWORD PTR[12+rsp] + mov eax,esi + mov DWORD PTR[8+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[20+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + rol r12d,30 + xor eax,esi + add edi,ecx + rol ebp,1 + add edi,eax + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d + mov DWORD PTR[12+rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[24+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[48+rsp] + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + rol r11d,30 + xor eax,r13d + add esi,ecx + rol r14d,1 + add esi,eax + xor edx,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[28+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[32+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,r13d + mov DWORD PTR[24+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[36+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,r12d + mov DWORD PTR[28+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[40+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[rsp] + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[36+rsp] + mov eax,r11d + mov DWORD PTR[32+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[4+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[40+rsp] + mov eax,edi + mov DWORD PTR[36+rsp],ebp + mov ecx,esi + xor r14d,DWORD PTR[48+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[8+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],r14d + mov ecx,r13d + xor edx,DWORD PTR[52+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],edx + mov ecx,r12d + xor ebp,DWORD PTR[56+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,r12d + mov DWORD PTR[48+rsp],ebp + mov ecx,r11d + xor r14d,DWORD PTR[60+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r11d + mov DWORD PTR[52+rsp],r14d + mov ecx,edi + xor edx,DWORD PTR[rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[24+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD PTR[60+rsp] + mov eax,edi + mov DWORD PTR[56+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[28+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[rsp] + mov eax,esi + mov DWORD PTR[60+rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[8+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[32+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[4+rsp] + mov eax,r13d + mov DWORD PTR[rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[12+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[8+rsp] + mov eax,r12d + mov DWORD PTR[4+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[16+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[40+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD PTR[12+rsp] + mov eax,r11d + mov DWORD PTR[8+rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[20+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[44+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD PTR[16+rsp] + mov eax,edi + mov DWORD PTR[12+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[48+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[20+rsp] + mov eax,esi + mov DWORD PTR[16+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[52+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[24+rsp] + mov eax,r13d + mov DWORD PTR[20+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[32+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[56+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[28+rsp] + mov eax,r12d + mov DWORD PTR[24+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[36+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[60+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[32+rsp] + mov eax,r11d + mov DWORD PTR[28+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[40+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[36+rsp] + mov eax,r12d + mov DWORD PTR[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD PTR[44+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD PTR[4+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[40+rsp] + mov eax,r11d + mov DWORD PTR[36+rsp],r14d + mov ebx,r11d + xor edx,DWORD PTR[48+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[44+rsp] + mov eax,edi + mov DWORD PTR[40+rsp],edx + mov ebx,edi + xor ebp,DWORD PTR[52+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[48+rsp] + mov eax,esi + mov DWORD PTR[44+rsp],ebp + mov ebx,esi + xor r14d,DWORD PTR[56+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD PTR[16+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[52+rsp] + mov eax,r13d + mov DWORD PTR[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD PTR[60+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD PTR[20+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[56+rsp] + mov eax,r12d + mov DWORD PTR[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD PTR[rsp] + and eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[24+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol ebp,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor r14d,DWORD PTR[60+rsp] + mov eax,r11d + mov DWORD PTR[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD PTR[4+rsp] + and eax,edi + mov ecx,r13d + xor r14d,DWORD PTR[28+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol r14d,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor edx,DWORD PTR[rsp] + mov eax,edi + mov DWORD PTR[60+rsp],r14d + mov ebx,edi + xor edx,DWORD PTR[8+rsp] + and eax,esi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+r14] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol edx,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor ebp,DWORD PTR[4+rsp] + mov eax,esi + mov DWORD PTR[rsp],edx + mov ebx,esi + xor ebp,DWORD PTR[12+rsp] + and eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol ebp,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor r14d,DWORD PTR[8+rsp] + mov eax,r13d + mov DWORD PTR[4+rsp],ebp + mov ebx,r13d + xor r14d,DWORD PTR[16+rsp] + and eax,r12d + mov ecx,edi + xor r14d,DWORD PTR[40+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol r14d,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor edx,DWORD PTR[12+rsp] + mov eax,r12d + mov DWORD PTR[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD PTR[20+rsp] + and eax,r11d + mov ecx,esi + xor edx,DWORD PTR[44+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+r14] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol edx,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor ebp,DWORD PTR[16+rsp] + mov eax,r11d + mov DWORD PTR[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD PTR[24+rsp] + and eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[48+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol ebp,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor r14d,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD PTR[28+rsp] + and eax,esi + mov ecx,r12d + xor r14d,DWORD PTR[52+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol r14d,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor edx,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],r14d + mov ebx,esi + xor edx,DWORD PTR[32+rsp] + and eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[56+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+r14] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol edx,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor ebp,DWORD PTR[28+rsp] + mov eax,r13d + mov DWORD PTR[24+rsp],edx + mov ebx,r13d + xor ebp,DWORD PTR[36+rsp] + and eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[60+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol ebp,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor r14d,DWORD PTR[32+rsp] + mov eax,r12d + mov DWORD PTR[28+rsp],ebp + mov ebx,r12d + xor r14d,DWORD PTR[40+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD PTR[rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[36+rsp] + mov eax,r11d + mov DWORD PTR[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD PTR[44+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD PTR[4+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[40+rsp] + mov eax,edi + mov DWORD PTR[36+rsp],edx + mov ebx,edi + xor ebp,DWORD PTR[48+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[8+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD PTR[52+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD PTR[12+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],r14d + mov ebx,r13d + xor edx,DWORD PTR[56+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD PTR[16+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[52+rsp] + mov eax,edi + mov DWORD PTR[48+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[20+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[56+rsp] + mov eax,esi + mov DWORD PTR[52+rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[24+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[60+rsp] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[4+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[8+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[12+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[36+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[12+rsp] + mov eax,esi + mov DWORD PTR[8+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d + mov DWORD PTR[12+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[24+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[48+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[20+rsp] + mov eax,r12d + mov DWORD PTR[16+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[28+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,r11d + mov DWORD PTR[20+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[32+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,edi + mov DWORD PTR[24+rsp],ebp + mov ecx,esi + xor r14d,DWORD PTR[36+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,esi + mov DWORD PTR[28+rsp],r14d + mov ecx,r13d + xor edx,DWORD PTR[40+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD PTR[rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD PTR[36+rsp] + mov eax,r13d + + mov ecx,r12d + xor ebp,DWORD PTR[44+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD PTR[4+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD PTR[40+rsp] + mov eax,r12d + + mov ecx,r11d + xor r14d,DWORD PTR[48+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD PTR[8+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,r11d + + mov ecx,edi + xor edx,DWORD PTR[52+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD PTR[56+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,esi + + mov ecx,r13d + xor r14d,DWORD PTR[60+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r13d + + mov ecx,r12d + xor edx,DWORD PTR[rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[24+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[60+rsp] + mov eax,r12d + + mov ecx,r11d + xor ebp,DWORD PTR[4+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[28+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r11d + mov ecx,edi + xor eax,r13d + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD PTR[r8] + add edi,DWORD PTR[4+r8] + add r11d,DWORD PTR[8+r8] + add r12d,DWORD PTR[12+r8] + add r13d,DWORD PTR[16+r8] + mov DWORD PTR[r8],esi + mov DWORD PTR[4+r8],edi + mov DWORD PTR[8+r8],r11d + mov DWORD PTR[12+r8],r12d + mov DWORD PTR[16+r8],r13d + + sub r10,1 + lea r9,QWORD PTR[64+r9] + jnz $L$loop + + mov rsi,QWORD PTR[64+rsp] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order:: +sha1_block_data_order ENDP + +ALIGN 16 +sha1_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[(-40-96)+rax],xmm6 + movaps XMMWORD PTR[(-40-80)+rax],xmm7 + movaps XMMWORD PTR[(-40-64)+rax],xmm8 + movaps XMMWORD PTR[(-40-48)+rax],xmm9 + movaps XMMWORD PTR[(-40-32)+rax],xmm10 + movaps XMMWORD PTR[(-40-16)+rax],xmm11 +$L$prologue_ssse3:: + mov r14,rax + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,QWORD PTR[((K_XX_XX+64))] + + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,ebx + mov ebp,DWORD PTR[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + add r9,64 + paddd xmm0,xmm9 +DB 102,15,56,0,222 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx + movdqa xmm8,xmm3 + paddd xmm9,xmm3 + mov edi,eax + add ebp,DWORD PTR[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm8,4 + and edi,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add ebp,eax + ror eax,7 + pxor xmm8,xmm2 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + pxor xmm4,xmm8 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD PTR[48+rsp],xmm9 + add edx,edi + and esi,eax + movdqa xmm10,xmm4 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + psrld xmm8,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax + psrld xmm10,30 + add ecx,edx + ror edx,7 + por xmm4,xmm8 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + pslld xmm9,2 + pxor xmm4,xmm10 + xor edx,ebp + movdqa xmm10,XMMWORD PTR[((-64))+r11] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm4,xmm9 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp + movdqa xmm9,xmm4 + paddd xmm10,xmm4 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm9,4 + and edi,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm9,xmm3 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + pxor xmm5,xmm9 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD PTR[rsp],xmm10 + add ebp,edi + and esi,ebx + movdqa xmm8,xmm5 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + psrld xmm9,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx + psrld xmm8,30 + add edx,ebp + ror ebp,7 + por xmm5,xmm9 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + pslld xmm10,2 + pxor xmm5,xmm8 + xor ebp,eax + movdqa xmm8,XMMWORD PTR[((-32))+r11] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm5,xmm10 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax + movdqa xmm10,xmm5 + paddd xmm8,xmm5 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm10,4 + and edi,edx + xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + pxor xmm6,xmm10 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD PTR[16+rsp],xmm8 + add eax,edi + and esi,ecx + movdqa xmm9,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + psrld xmm10,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx + psrld xmm9,30 + add ebp,eax + ror eax,7 + por xmm6,xmm10 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + pslld xmm8,2 + pxor xmm6,xmm9 + xor eax,ebx + movdqa xmm9,XMMWORD PTR[((-32))+r11] + rol ebp,5 + add edx,edi + and esi,eax + pxor xmm6,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx + movdqa xmm8,xmm6 + paddd xmm9,xmm6 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm8,4 + and edi,ebp + xor ebp,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm8,xmm5 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + pxor xmm7,xmm8 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD PTR[32+rsp],xmm9 + add ebx,edi + and esi,edx + movdqa xmm10,xmm7 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + psrld xmm8,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx + psrld xmm10,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm8 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[60+rsp] + pslld xmm9,2 + pxor xmm7,xmm10 + xor ebx,ecx + movdqa xmm10,XMMWORD PTR[((-32))+r11] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx + rol ebp,5 + pxor xmm0,xmm1 + add edx,esi + and edi,eax + movdqa xmm8,xmm10 + xor eax,ebx + paddd xmm10,xmm7 + add edx,ebp + pxor xmm0,xmm9 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[4+rsp] + movdqa xmm9,xmm0 + xor ebp,eax + rol edx,5 + movdqa XMMWORD PTR[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm0,2 + add ecx,edx + ror edx,7 + psrld xmm9,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + por xmm0,xmm9 + xor edx,ebp + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD PTR[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + add ebp,esi + xor edi,ecx + movdqa xmm9,xmm8 + ror ebx,7 + paddd xmm8,xmm0 + add ebp,eax + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[rsp],xmm8 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[24+rsp] + pslld xmm1,2 + xor esi,eax + mov edi,edx + psrld xmm10,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm1,xmm10 + add ecx,edx + add ebx,DWORD PTR[28+rsp] + pshufd xmm8,xmm0,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD PTR[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD PTR[r11] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm9 + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[40+rsp] + pslld xmm2,2 + xor esi,ebx + mov edi,ebp + psrld xmm8,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm2,xmm8 + add edx,ebp + add ecx,DWORD PTR[44+rsp] + pshufd xmm9,xmm1,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + add ebx,esi + xor edi,ebp + movdqa xmm8,xmm10 + ror edx,7 + paddd xmm10,xmm2 + add ebx,ecx + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + add eax,edi + xor esi,edx + movdqa XMMWORD PTR[32+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[56+rsp] + pslld xmm3,2 + xor esi,ecx + mov edi,eax + psrld xmm9,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm3,xmm9 + add ebp,eax + add edx,DWORD PTR[60+rsp] + pshufd xmm10,xmm2,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD PTR[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + add ecx,esi + xor edi,eax + movdqa xmm9,xmm8 + ror ebp,7 + paddd xmm8,xmm3 + add ecx,edx + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp + movdqa XMMWORD PTR[48+rsp],xmm8 + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[8+rsp] + pslld xmm4,2 + xor esi,edx + mov edi,ebx + psrld xmm10,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm4,xmm10 + add eax,ebx + add ebp,DWORD PTR[12+rsp] + pshufd xmm8,xmm3,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + add edx,esi + xor edi,ebx + movdqa xmm10,xmm9 + ror eax,7 + paddd xmm9,xmm4 + add edx,ebp + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax + movdqa XMMWORD PTR[rsp],xmm9 + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[24+rsp] + pslld xmm5,2 + xor esi,ebp + mov edi,ecx + psrld xmm8,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm5,xmm8 + add ebx,ecx + add eax,DWORD PTR[28+rsp] + pshufd xmm9,xmm4,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 + add ebp,DWORD PTR[32+rsp] + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm9,xmm5 + mov edi,eax + xor esi,ecx + pxor xmm6,xmm7 + rol eax,5 + add ebp,esi + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 + xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax + add edx,DWORD PTR[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm9,xmm6 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD PTR[16+rsp],xmm10 + rol ebp,5 + add edx,edi + xor esi,eax + pslld xmm6,2 + xor eax,ebx + add edx,ebp + psrld xmm9,30 + add ecx,DWORD PTR[40+rsp] + and esi,eax + xor eax,ebx + por xmm6,xmm9 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm10,xmm5,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm7,xmm3 + add eax,DWORD PTR[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm10,xmm6 + mov edi,ebx + xor esi,edx + pxor xmm7,xmm0 + rol ebx,5 + add eax,esi + movdqa xmm9,XMMWORD PTR[32+r11] + xor edi,ecx + paddd xmm8,xmm6 + xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx + add ebp,DWORD PTR[52+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm10,xmm7 + mov esi,eax + xor edi,ecx + movdqa XMMWORD PTR[32+rsp],xmm8 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm7,2 + xor ebx,ecx + add ebp,eax + psrld xmm10,30 + add edx,DWORD PTR[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm7,xmm10 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm8,xmm6,238 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm0,xmm4 + add ebx,DWORD PTR[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + punpcklqdq xmm8,xmm7 + mov edi,ecx + xor esi,ebp + pxor xmm0,xmm1 + rol ecx,5 + add ebx,esi + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 + xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx + add eax,DWORD PTR[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm8,xmm0 + mov esi,ebx + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm9 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm0,2 + xor ecx,edx + add eax,ebx + psrld xmm8,30 + add ebp,DWORD PTR[8+rsp] + and esi,ecx + xor ecx,edx + por xmm0,xmm8 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm9,xmm7,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm1,xmm5 + add ecx,DWORD PTR[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm9,xmm0 + mov edi,edx + xor esi,eax + pxor xmm1,xmm2 + rol edx,5 + add ecx,esi + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 + xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx + add ebx,DWORD PTR[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movdqa xmm9,xmm1 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD PTR[rsp],xmm10 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm1,2 + xor edx,ebp + add ebx,ecx + psrld xmm9,30 + add eax,DWORD PTR[24+rsp] + and esi,edx + xor edx,ebp + por xmm1,xmm9 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm10,xmm0,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[28+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm2,xmm6 + add edx,DWORD PTR[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm10,xmm1 + mov edi,ebp + xor esi,ebx + pxor xmm2,xmm3 + rol ebp,5 + add edx,esi + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 + xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp + add ecx,DWORD PTR[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm10,xmm2 + mov esi,edx + xor edi,eax + movdqa XMMWORD PTR[16+rsp],xmm8 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm2,2 + xor ebp,eax + add ecx,edx + psrld xmm10,30 + add ebx,DWORD PTR[40+rsp] + and esi,ebp + xor ebp,eax + por xmm2,xmm10 + ror edx,7 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm8,xmm1,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm3,xmm7 + add ebp,DWORD PTR[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + add ebp,esi + xor edi,ecx + movdqa xmm10,xmm9 + ror ebx,7 + paddd xmm9,xmm2 + add ebp,eax + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[32+rsp],xmm9 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[56+rsp] + pslld xmm3,2 + xor esi,eax + mov edi,edx + psrld xmm8,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm3,xmm8 + add ecx,edx + add ebx,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm3 + add eax,esi + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r9,r10 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,206 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm0,xmm9 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD PTR[rsp],xmm0 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,214 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + paddd xmm1,xmm9 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD PTR[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov edi,ecx + mov DWORD PTR[12+r8],edx + xor edi,edx + mov DWORD PTR[16+r8],ebp + and esi,edi + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((-40-96))+r14] + movaps xmm7,XMMWORD PTR[((-40-80))+r14] + movaps xmm8,XMMWORD PTR[((-40-64))+r14] + movaps xmm9,XMMWORD PTR[((-40-48))+r14] + movaps xmm10,XMMWORD PTR[((-40-32))+r14] + movaps xmm11,XMMWORD PTR[((-40-16))+r14] + lea rsi,QWORD PTR[r14] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_ssse3:: +sha1_block_data_order_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 +DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 +DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 +DB 103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[64+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + + jmp $L$common_seh_tail +se_handler ENDP + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[232+r8] + + lea rsi,QWORD PTR[((-40-96))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,12 + DD 0a548f3fch + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha1_block_data_order + DD imagerel $L$SEH_end_sha1_block_data_order + DD imagerel $L$SEH_info_sha1_block_data_order + DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha1_block_data_order_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha1_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler +$L$SEH_info_sha1_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END diff --git a/win-x86_64/crypto/sha/sha256-x86_64.asm b/win-x86_64/crypto/sha/sha256-x86_64.asm new file mode 100644 index 0000000..41f2edd --- /dev/null +++ b/win-x86_64/crypto/sha/sha256-x86_64.asm @@ -0,0 +1,2997 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC sha256_block_data_order + +ALIGN 16 +sha256_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea r11,QWORD PTR[OPENSSL_ia32cap_P] + mov r9d,DWORD PTR[r11] + mov r10d,DWORD PTR[4+r11] + mov r11d,DWORD PTR[8+r11] + test r10d,512 + jnz $L$ssse3_shortcut + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 +$L$prologue:: + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov edi,ebx + lea rbp,QWORD PTR[K256] + xor edi,ecx + mov r12d,DWORD PTR[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d + mov r12d,DWORD PTR[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d + mov r12d,DWORD PTR[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d + mov r12d,DWORD PTR[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d + mov r12d,DWORD PTR[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + add edx,r14d + mov r12d,DWORD PTR[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d + mov r12d,DWORD PTR[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d + mov r12d,DWORD PTR[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + add eax,r14d + mov r12d,DWORD PTR[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d + mov r12d,DWORD PTR[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d + mov r12d,DWORD PTR[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d + mov r12d,DWORD PTR[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d + mov r12d,DWORD PTR[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + add edx,r14d + mov r12d,DWORD PTR[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d + mov r12d,DWORD PTR[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d + mov r12d,DWORD PTR[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13d,DWORD PTR[4+rsp] + mov r15d,DWORD PTR[56+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[36+rsp] + + add r12d,DWORD PTR[rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[8+rsp] + mov edi,DWORD PTR[60+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[40+rsp] + + add r12d,DWORD PTR[4+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[12+rsp] + mov r15d,DWORD PTR[rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[44+rsp] + + add r12d,DWORD PTR[8+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[16+rsp] + mov edi,DWORD PTR[4+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[48+rsp] + + add r12d,DWORD PTR[12+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[20+rsp] + mov r15d,DWORD PTR[8+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[52+rsp] + + add r12d,DWORD PTR[16+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[24+rsp] + mov edi,DWORD PTR[12+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[56+rsp] + + add r12d,DWORD PTR[20+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[28+rsp] + mov r15d,DWORD PTR[16+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[60+rsp] + + add r12d,DWORD PTR[24+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[32+rsp] + mov edi,DWORD PTR[20+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[rsp] + + add r12d,DWORD PTR[28+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[36+rsp] + mov r15d,DWORD PTR[24+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[4+rsp] + + add r12d,DWORD PTR[32+rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[40+rsp] + mov edi,DWORD PTR[28+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[8+rsp] + + add r12d,DWORD PTR[36+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[44+rsp] + mov r15d,DWORD PTR[32+rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[12+rsp] + + add r12d,DWORD PTR[40+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[48+rsp] + mov edi,DWORD PTR[36+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[16+rsp] + + add r12d,DWORD PTR[44+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[52+rsp] + mov r15d,DWORD PTR[40+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[20+rsp] + + add r12d,DWORD PTR[48+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[56+rsp] + mov edi,DWORD PTR[44+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[24+rsp] + + add r12d,DWORD PTR[52+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[60+rsp] + mov r15d,DWORD PTR[48+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[28+rsp] + + add r12d,DWORD PTR[56+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[rsp] + mov edi,DWORD PTR[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[32+rsp] + + add r12d,DWORD PTR[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + cmp BYTE PTR[3+rbp],0 + jnz $L$rounds_16_xx + + mov rdi,QWORD PTR[((64+0))+rsp] + add eax,r14d + lea rsi,QWORD PTR[64+rsi] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop + + mov rsi,QWORD PTR[((64+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order:: +sha256_block_data_order ENDP +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha256_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$ssse3_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,160 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 + movaps XMMWORD PTR[(64+32)+rsp],xmm6 + movaps XMMWORD PTR[(64+48)+rsp],xmm7 + movaps XMMWORD PTR[(64+64)+rsp],xmm8 + movaps XMMWORD PTR[(64+80)+rsp],xmm9 +$L$prologue_ssse3:: + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + + + jmp $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3:: + movdqa xmm7,XMMWORD PTR[((K256+512))] + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD PTR[48+rsi] + lea rbp,QWORD PTR[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR[rbp] + movdqa xmm5,XMMWORD PTR[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD PTR[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD PTR[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD PTR[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47:: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[48+rsp],xmm6 + cmp BYTE PTR[131+rbp],0 + jne $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD PTR[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD PTR[rdi] + lea rsi,QWORD PTR[64+rsi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop_ssse3 + + mov rsi,QWORD PTR[((64+24))+rsp] + movaps xmm6,XMMWORD PTR[((64+32))+rsp] + movaps xmm7,XMMWORD PTR[((64+48))+rsp] + movaps xmm8,XMMWORD PTR[((64+64))+rsp] + movaps xmm9,XMMWORD PTR[((64+80))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_ssse3:: +sha256_block_data_order_ssse3 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + mov rsi,rax + mov rax,QWORD PTR[((64+24))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((64+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha256_block_data_order + DD imagerel $L$SEH_end_sha256_block_data_order + DD imagerel $L$SEH_info_sha256_block_data_order + DD imagerel $L$SEH_begin_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha256_block_data_order_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha256_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue +$L$SEH_info_sha256_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END diff --git a/win-x86_64/crypto/sha/sha512-x86_64.asm b/win-x86_64/crypto/sha/sha512-x86_64.asm new file mode 100644 index 0000000..e993c3c --- /dev/null +++ b/win-x86_64/crypto/sha/sha512-x86_64.asm @@ -0,0 +1,1913 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC sha512_block_data_order + +ALIGN 16 +sha512_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,QWORD PTR[rdx*8+rsi] + and rsp,-64 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 +$L$prologue:: + + mov rax,QWORD PTR[rdi] + mov rbx,QWORD PTR[8+rdi] + mov rcx,QWORD PTR[16+rdi] + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov rdi,rbx + lea rbp,QWORD PTR[K512] + xor rdi,rcx + mov r12,QWORD PTR[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + add r11,r14 + mov r12,QWORD PTR[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + add r10,r14 + mov r12,QWORD PTR[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + add r9,r14 + mov r12,QWORD PTR[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + add r8,r14 + mov r12,QWORD PTR[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 + mov r12,QWORD PTR[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 + mov r12,QWORD PTR[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 + mov r12,QWORD PTR[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + add rax,r14 + mov r12,QWORD PTR[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + add r11,r14 + mov r12,QWORD PTR[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + add r10,r14 + mov r12,QWORD PTR[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + add r9,r14 + mov r12,QWORD PTR[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + add r8,r14 + mov r12,QWORD PTR[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 + mov r12,QWORD PTR[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 + mov r12,QWORD PTR[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 + mov r12,QWORD PTR[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13,QWORD PTR[8+rsp] + mov r15,QWORD PTR[112+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[72+rsp] + + add r12,QWORD PTR[rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[16+rsp] + mov rdi,QWORD PTR[120+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[80+rsp] + + add r12,QWORD PTR[8+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[24+rsp] + mov r15,QWORD PTR[rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[88+rsp] + + add r12,QWORD PTR[16+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[32+rsp] + mov rdi,QWORD PTR[8+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[96+rsp] + + add r12,QWORD PTR[24+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[40+rsp] + mov r15,QWORD PTR[16+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[104+rsp] + + add r12,QWORD PTR[32+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[48+rsp] + mov rdi,QWORD PTR[24+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[112+rsp] + + add r12,QWORD PTR[40+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[56+rsp] + mov r15,QWORD PTR[32+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[120+rsp] + + add r12,QWORD PTR[48+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[64+rsp] + mov rdi,QWORD PTR[40+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[rsp] + + add r12,QWORD PTR[56+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[72+rsp] + mov r15,QWORD PTR[48+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[8+rsp] + + add r12,QWORD PTR[64+rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[80+rsp] + mov rdi,QWORD PTR[56+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[16+rsp] + + add r12,QWORD PTR[72+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[88+rsp] + mov r15,QWORD PTR[64+rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[24+rsp] + + add r12,QWORD PTR[80+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[96+rsp] + mov rdi,QWORD PTR[72+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[32+rsp] + + add r12,QWORD PTR[88+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[104+rsp] + mov r15,QWORD PTR[80+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[40+rsp] + + add r12,QWORD PTR[96+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[112+rsp] + mov rdi,QWORD PTR[88+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[48+rsp] + + add r12,QWORD PTR[104+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[120+rsp] + mov r15,QWORD PTR[96+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[56+rsp] + + add r12,QWORD PTR[112+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[rsp] + mov rdi,QWORD PTR[104+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[64+rsp] + + add r12,QWORD PTR[120+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + cmp BYTE PTR[7+rbp],0 + jnz $L$rounds_16_xx + + mov rdi,QWORD PTR[((128+0))+rsp] + add rax,r14 + lea rsi,QWORD PTR[128+rsi] + + add rax,QWORD PTR[rdi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + jb $L$loop + + mov rsi,QWORD PTR[((128+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order:: +sha512_block_data_order ENDP +ALIGN 64 + +K512:: + DQ 0428a2f98d728ae22h,07137449123ef65cdh + DQ 0428a2f98d728ae22h,07137449123ef65cdh + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 03956c25bf348b538h,059f111f1b605d019h + DQ 03956c25bf348b538h,059f111f1b605d019h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0d807aa98a3030242h,012835b0145706fbeh + DQ 0d807aa98a3030242h,012835b0145706fbeh + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 027b70a8546d22ffch,02e1b21385c26c926h + DQ 027b70a8546d22ffch,02e1b21385c26c926h + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0d192e819d6ef5218h,0d69906245565a910h + DQ 0d192e819d6ef5218h,0d69906245565a910h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 0748f82ee5defb2fch,078a5636f43172f60h + DQ 0748f82ee5defb2fch,078a5636f43172f60h + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 090befffa23631e28h,0a4506cebde82bde9h + DQ 090befffa23631e28h,0a4506cebde82bde9h + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0ca273eceea26619ch,0d186b8c721c0c207h + DQ 0ca273eceea26619ch,0d186b8c721c0c207h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 028db77f523047d84h,032caab7b40c72493h + DQ 028db77f523047d84h,032caab7b40c72493h + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah + DQ 05fcb6fab3ad6faech,06c44198c4a475817h + DQ 05fcb6fab3ad6faech,06c44198c4a475817h + + DQ 00001020304050607h,008090a0b0c0d0e0fh + DQ 00001020304050607h,008090a0b0c0d0e0fh +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + mov rsi,rax + mov rax,QWORD PTR[((128+24))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((128+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,12 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha512_block_data_order + DD imagerel $L$SEH_end_sha512_block_data_order + DD imagerel $L$SEH_info_sha512_block_data_order +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha512_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue + +.xdata ENDS +END |