diff options
author | Adam Langley <agl@google.com> | 2015-01-22 14:27:53 -0800 |
---|---|---|
committer | Adam Langley <agl@google.com> | 2015-01-30 16:52:14 -0800 |
commit | d9e397b599b13d642138480a28c14db7a136bf05 (patch) | |
tree | 34bab61dc4ce323b123ad4614dbc07e86ea2f9ef /win-x86_64/crypto/bn | |
download | external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.zip external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.gz external_boringssl-d9e397b599b13d642138480a28c14db7a136bf05.tar.bz2 |
Initial commit of BoringSSL for Android.
Diffstat (limited to 'win-x86_64/crypto/bn')
-rw-r--r-- | win-x86_64/crypto/bn/modexp512-x86_64.asm | 1887 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/rsaz-avx2.asm | 29 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/rsaz-x86_64.asm | 1326 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/x86_64-mont.asm | 945 | ||||
-rw-r--r-- | win-x86_64/crypto/bn/x86_64-mont5.asm | 2061 |
5 files changed, 6248 insertions, 0 deletions
diff --git a/win-x86_64/crypto/bn/modexp512-x86_64.asm b/win-x86_64/crypto/bn/modexp512-x86_64.asm new file mode 100644 index 0000000..d3e4a61 --- /dev/null +++ b/win-x86_64/crypto/bn/modexp512-x86_64.asm @@ -0,0 +1,1887 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +ALIGN 16 +MULADD_128x512 PROC PRIVATE + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[rcx],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rcx],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + DB 0F3h,0C3h ;repret +MULADD_128x512 ENDP + +ALIGN 16 +mont_reduce PROC PRIVATE + lea rdi,QWORD PTR[192+rsp] + mov rsi,QWORD PTR[32+rsp] + add rsi,576 + lea rcx,QWORD PTR[520+rsp] + + mov rbp,QWORD PTR[96+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + mov r8,QWORD PTR[rcx] + add r8,rax + adc rdx,0 + mov QWORD PTR[rdi],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + mov r9,QWORD PTR[8+rcx] + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + mov r10,QWORD PTR[16+rcx] + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + mov r11,QWORD PTR[24+rcx] + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + mov r12,QWORD PTR[32+rcx] + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + mov r13,QWORD PTR[40+rcx] + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + mov r14,QWORD PTR[48+rcx] + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + mov r15,QWORD PTR[56+rcx] + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[104+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rdi],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[112+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[16+rdi],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[120+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[24+rdi],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + xor rax,rax + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + adc r10,QWORD PTR[80+rcx] + adc r11,QWORD PTR[88+rcx] + adc rax,0 + + + + + mov QWORD PTR[64+rdi],r8 + mov QWORD PTR[72+rdi],r9 + mov rbp,r10 + mov QWORD PTR[88+rdi],r11 + + mov QWORD PTR[384+rsp],rax + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + + + + + + + + + add rdi,8*10 + + add rsi,64 + lea rcx,QWORD PTR[296+rsp] + + call MULADD_128x512 + + mov rax,QWORD PTR[384+rsp] + + + add r8,QWORD PTR[((-16))+rdi] + adc r9,QWORD PTR[((-8))+rdi] + mov QWORD PTR[64+rcx],r8 + mov QWORD PTR[72+rcx],r9 + + adc rax,rax + mov QWORD PTR[384+rsp],rax + + lea rdi,QWORD PTR[192+rsp] + add rsi,64 + + + + + + mov r8,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + + mov rax,QWORD PTR[rcx] + mul r8 + mov rbp,rax + mov r9,rdx + + mov rax,QWORD PTR[8+rcx] + mul r8 + add r9,rax + + mov rax,QWORD PTR[rcx] + mul rbx + add r9,rax + + mov QWORD PTR[8+rdi],r9 + + + sub rsi,192 + + mov r8,QWORD PTR[rcx] + mov r9,QWORD PTR[8+rcx] + + call MULADD_128x512 + + + + + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdi,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + + + mov rbp,QWORD PTR[384+rsp] + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + + + adc rbp,rbp + + + + shl rbp,3 + mov rcx,QWORD PTR[32+rsp] + add rbp,rcx + + + xor rsi,rsi + + add r10,QWORD PTR[rbp] + adc r11,QWORD PTR[64+rbp] + adc r12,QWORD PTR[128+rbp] + adc r13,QWORD PTR[192+rbp] + adc r14,QWORD PTR[256+rbp] + adc r15,QWORD PTR[320+rbp] + adc r8,QWORD PTR[384+rbp] + adc r9,QWORD PTR[448+rbp] + + + + sbb rsi,0 + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + mov rbp,1 + sub r10,rax + sbb r11,rbx + sbb r12,rdi + sbb r13,rdx + + + + + sbb rbp,0 + + + + add rcx,512 + mov rax,QWORD PTR[32+rcx] + mov rbx,QWORD PTR[40+rcx] + mov rdi,QWORD PTR[48+rcx] + mov rdx,QWORD PTR[56+rcx] + + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + + + sub rbp,1 + + sbb r14,rax + sbb r15,rbx + sbb r8,rdi + sbb r9,rdx + + + + mov rsi,QWORD PTR[144+rsp] + mov QWORD PTR[rsi],r10 + mov QWORD PTR[8+rsi],r11 + mov QWORD PTR[16+rsi],r12 + mov QWORD PTR[24+rsi],r13 + mov QWORD PTR[32+rsi],r14 + mov QWORD PTR[40+rsi],r15 + mov QWORD PTR[48+rsi],r8 + mov QWORD PTR[56+rsi],r9 + + DB 0F3h,0C3h ;repret +mont_reduce ENDP + +ALIGN 16 +mont_mul_a3b PROC PRIVATE + + + + + mov rbp,QWORD PTR[rdi] + + mov rax,r10 + mul rbp + mov QWORD PTR[520+rsp],rax + mov r10,rdx + mov rax,r11 + mul rbp + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r12 + mul rbp + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r13 + mul rbp + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r14 + mul rbp + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r15 + mul rbp + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r8 + mul rbp + add r15,rax + adc rdx,0 + mov r8,rdx + mov rax,r9 + mul rbp + add r8,rax + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[528+rsp],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[16+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[536+rsp],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + mov rbp,QWORD PTR[24+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r12,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r12 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov r12,rdx + mov rbp,QWORD PTR[32+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r13,rax + adc rdx,0 + mov QWORD PTR[552+rsp],r13 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov r13,rdx + mov rbp,QWORD PTR[40+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r14,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r14 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov r14,rdx + mov rbp,QWORD PTR[48+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r15,rax + adc rdx,0 + mov QWORD PTR[568+rsp],r15 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov r15,rdx + mov rbp,QWORD PTR[56+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov QWORD PTR[584+rsp],r9 + mov QWORD PTR[592+rsp],r10 + mov QWORD PTR[600+rsp],r11 + mov QWORD PTR[608+rsp],r12 + mov QWORD PTR[616+rsp],r13 + mov QWORD PTR[624+rsp],r14 + mov QWORD PTR[632+rsp],r15 + mov QWORD PTR[640+rsp],r8 + + + + + + jmp mont_reduce + + +mont_mul_a3b ENDP + +ALIGN 16 +sqr_reduce PROC PRIVATE + mov rcx,QWORD PTR[16+rsp] + + + + mov rbx,r10 + + mov rax,r11 + mul rbx + mov QWORD PTR[528+rsp],rax + mov r10,rdx + mov rax,r12 + mul rbx + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r13 + mul rbx + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r14 + mul rbx + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r15 + mul rbx + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r8 + mul rbx + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + mov rsi,rdx + + mov QWORD PTR[536+rsp],r10 + + + + + + mov rbx,QWORD PTR[8+rcx] + + mov rax,QWORD PTR[16+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r11 + + mov r10,rdx + mov rax,QWORD PTR[24+rcx] + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[552+rsp],r12 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r13,rax + adc rdx,0 + add r13,r10 + adc rdx,0 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[16+rcx] + + mov rax,QWORD PTR[24+rcx] + mul rbx + add r13,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r13 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + mov QWORD PTR[568+rsp],r14 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r12,rdx + + + + + + mov rbx,QWORD PTR[24+rcx] + + mov rax,QWORD PTR[32+rcx] + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r15 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + mov QWORD PTR[584+rsp],rsi + + mov r10,rdx + mov rax,r8 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + + mov r15,rdx + + + + + mov rbx,QWORD PTR[32+rcx] + + mov rax,QWORD PTR[40+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[592+rsp],r11 + + mov r10,rdx + mov rax,r8 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[600+rsp],r12 + + mov r10,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[40+rcx] + + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[608+rsp],r15 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + mov QWORD PTR[616+rsp],r11 + + mov r12,rdx + + + + + mov rbx,r8 + + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + mov QWORD PTR[624+rsp],r12 + + mov QWORD PTR[632+rsp],rdx + + + mov r10,QWORD PTR[528+rsp] + mov r11,QWORD PTR[536+rsp] + mov r12,QWORD PTR[544+rsp] + mov r13,QWORD PTR[552+rsp] + mov r14,QWORD PTR[560+rsp] + mov r15,QWORD PTR[568+rsp] + + mov rax,QWORD PTR[24+rcx] + mul rax + mov rdi,rax + mov r8,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc r8,0 + + mov rax,QWORD PTR[rcx] + mul rax + mov QWORD PTR[520+rsp],rax + mov rbx,rdx + + mov rax,QWORD PTR[8+rcx] + mul rax + + add r10,rbx + adc r11,rax + adc rdx,0 + + mov rbx,rdx + mov QWORD PTR[528+rsp],r10 + mov QWORD PTR[536+rsp],r11 + + mov rax,QWORD PTR[16+rcx] + mul rax + + add r12,rbx + adc r13,rax + adc rdx,0 + + mov rbx,rdx + + mov QWORD PTR[544+rsp],r12 + mov QWORD PTR[552+rsp],r13 + + xor rbp,rbp + add r14,rbx + adc r15,rdi + adc rbp,0 + + mov QWORD PTR[560+rsp],r14 + mov QWORD PTR[568+rsp],r15 + + + + + mov r10,QWORD PTR[576+rsp] + mov r11,QWORD PTR[584+rsp] + mov r12,QWORD PTR[592+rsp] + mov r13,QWORD PTR[600+rsp] + mov r14,QWORD PTR[608+rsp] + mov r15,QWORD PTR[616+rsp] + mov rdi,QWORD PTR[624+rsp] + mov rsi,QWORD PTR[632+rsp] + + mov rax,r9 + mul rax + mov r9,rax + mov rbx,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc rdi,rdi + adc rsi,rsi + adc rbx,0 + + add r10,rbp + + mov rax,QWORD PTR[32+rcx] + mul rax + + add r10,r8 + adc r11,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[576+rsp],r10 + mov QWORD PTR[584+rsp],r11 + + mov rax,QWORD PTR[40+rcx] + mul rax + + add r12,rbp + adc r13,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[592+rsp],r12 + mov QWORD PTR[600+rsp],r13 + + mov rax,QWORD PTR[48+rcx] + mul rax + + add r14,rbp + adc r15,rax + adc rdx,0 + + mov QWORD PTR[608+rsp],r14 + mov QWORD PTR[616+rsp],r15 + + add rdi,rdx + adc rsi,r9 + adc rbx,0 + + mov QWORD PTR[624+rsp],rdi + mov QWORD PTR[632+rsp],rsi + mov QWORD PTR[640+rsp],rbx + + jmp mont_reduce + + +sqr_reduce ENDP +PUBLIC mod_exp_512 + +mod_exp_512 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_mod_exp_512:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + + + mov r8,rsp + sub rsp,2688 + and rsp,-64 + + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],rcx +$L$body:: + + + + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[512+rsp],xmm4 + movdqa XMMWORD PTR[528+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[544+rsp],xmm0 + movdqa XMMWORD PTR[560+rsp],xmm1 + movdqa XMMWORD PTR[576+rsp],xmm2 + movdqa XMMWORD PTR[592+rsp],xmm3 + + + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + + lea rbx,QWORD PTR[384+rsp] + mov QWORD PTR[136+rsp],rbx + call mont_reduce + + + lea rcx,QWORD PTR[448+rsp] + xor rax,rax + mov QWORD PTR[rcx],rax + mov QWORD PTR[8+rcx],rax + mov QWORD PTR[24+rcx],rax + mov QWORD PTR[32+rcx],rax + mov QWORD PTR[40+rcx],rax + mov QWORD PTR[48+rcx],rax + mov QWORD PTR[56+rcx],rax + mov QWORD PTR[128+rsp],rax + mov QWORD PTR[16+rcx],1 + + lea rbp,QWORD PTR[640+rsp] + mov rsi,rcx + mov rdi,rbp + mov rax,8 +loop_0:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rdi],bx + shr rbx,16 + mov WORD PTR[64+rdi],bx + shr rbx,16 + mov WORD PTR[128+rdi],bx + shr rbx,16 + mov WORD PTR[192+rdi],bx + lea rcx,QWORD PTR[8+rcx] + lea rdi,QWORD PTR[256+rdi] + dec rax + jnz loop_0 + mov rax,31 + mov QWORD PTR[32+rsp],rax + mov QWORD PTR[40+rsp],rbp + + mov QWORD PTR[136+rsp],rsi + mov r10,QWORD PTR[rsi] + mov r11,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + mov r14,QWORD PTR[32+rsi] + mov r15,QWORD PTR[40+rsi] + mov r8,QWORD PTR[48+rsi] + mov r9,QWORD PTR[56+rsi] +init_loop:: + lea rdi,QWORD PTR[384+rsp] + call mont_mul_a3b + lea rsi,QWORD PTR[448+rsp] + mov rbp,QWORD PTR[40+rsp] + add rbp,2 + mov QWORD PTR[40+rsp],rbp + mov rcx,rsi + mov rax,8 +loop_1:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rbp],bx + shr rbx,16 + mov WORD PTR[64+rbp],bx + shr rbx,16 + mov WORD PTR[128+rbp],bx + shr rbx,16 + mov WORD PTR[192+rbp],bx + lea rcx,QWORD PTR[8+rcx] + lea rbp,QWORD PTR[256+rbp] + dec rax + jnz loop_1 + mov rax,QWORD PTR[32+rsp] + sub rax,1 + mov QWORD PTR[32+rsp],rax + jne init_loop + + + + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm1 + movdqa XMMWORD PTR[96+rsp],xmm2 + movdqa XMMWORD PTR[112+rsp],xmm3 + + + + + + mov eax,DWORD PTR[126+rsp] + mov rdx,rax + shr rax,11 + and edx,007FFh + mov DWORD PTR[126+rsp],edx + lea rsi,QWORD PTR[640+rax*2+rsp] + mov rdx,QWORD PTR[8+rsp] + mov rbp,4 +loop_2:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_2 + mov QWORD PTR[48+rsp],505 + + mov rcx,QWORD PTR[8+rsp] + mov QWORD PTR[136+rsp],rcx + mov r10,QWORD PTR[rcx] + mov r11,QWORD PTR[8+rcx] + mov r12,QWORD PTR[16+rcx] + mov r13,QWORD PTR[24+rcx] + mov r14,QWORD PTR[32+rcx] + mov r15,QWORD PTR[40+rcx] + mov r8,QWORD PTR[48+rcx] + mov r9,QWORD PTR[56+rcx] + jmp sqr_2 + +main_loop_a3b:: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2:: + call sqr_reduce + call sqr_reduce + + + + mov rcx,QWORD PTR[48+rsp] + mov rax,rcx + shr rax,4 + mov edx,DWORD PTR[64+rax*2+rsp] + and rcx,15 + shr rdx,cl + and rdx,01Fh + + lea rsi,QWORD PTR[640+rdx*2+rsp] + lea rdx,QWORD PTR[448+rsp] + mov rdi,rdx + mov rbp,4 +loop_3:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_3 + mov rsi,QWORD PTR[8+rsp] + call mont_mul_a3b + + + + mov rcx,QWORD PTR[48+rsp] + sub rcx,5 + mov QWORD PTR[48+rsp],rcx + jge main_loop_a3b + + + +end_main_loop_a3b:: + + + mov rdx,QWORD PTR[8+rsp] + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + movdqa XMMWORD PTR[576+rsp],xmm4 + movdqa XMMWORD PTR[592+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[512+rsp],xmm0 + movdqa XMMWORD PTR[528+rsp],xmm1 + movdqa XMMWORD PTR[544+rsp],xmm2 + movdqa XMMWORD PTR[560+rsp],xmm3 + call mont_reduce + + + + mov rax,QWORD PTR[8+rsp] + mov r8,QWORD PTR[rax] + mov r9,QWORD PTR[8+rax] + mov r10,QWORD PTR[16+rax] + mov r11,QWORD PTR[24+rax] + mov r12,QWORD PTR[32+rax] + mov r13,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r15,QWORD PTR[56+rax] + + + mov rbx,QWORD PTR[24+rsp] + add rbx,512 + + sub r8,QWORD PTR[rbx] + sbb r9,QWORD PTR[8+rbx] + sbb r10,QWORD PTR[16+rbx] + sbb r11,QWORD PTR[24+rbx] + sbb r12,QWORD PTR[32+rbx] + sbb r13,QWORD PTR[40+rbx] + sbb r14,QWORD PTR[48+rbx] + sbb r15,QWORD PTR[56+rbx] + + + mov rsi,QWORD PTR[rax] + mov rdi,QWORD PTR[8+rax] + mov rcx,QWORD PTR[16+rax] + mov rdx,QWORD PTR[24+rax] + cmovnc rsi,r8 + cmovnc rdi,r9 + cmovnc rcx,r10 + cmovnc rdx,r11 + mov QWORD PTR[rax],rsi + mov QWORD PTR[8+rax],rdi + mov QWORD PTR[16+rax],rcx + mov QWORD PTR[24+rax],rdx + + mov rsi,QWORD PTR[32+rax] + mov rdi,QWORD PTR[40+rax] + mov rcx,QWORD PTR[48+rax] + mov rdx,QWORD PTR[56+rax] + cmovnc rsi,r12 + cmovnc rdi,r13 + cmovnc rcx,r14 + cmovnc rdx,r15 + mov QWORD PTR[32+rax],rsi + mov QWORD PTR[40+rax],rdi + mov QWORD PTR[48+rax],rcx + mov QWORD PTR[56+rax],rdx + + mov rsi,QWORD PTR[rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbx,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_mod_exp_512:: +mod_exp_512 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mod_exp_512_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[rax] + + mov rbx,QWORD PTR[32+rax] + mov rbp,QWORD PTR[40+rax] + mov r12,QWORD PTR[24+rax] + mov r13,QWORD PTR[16+rax] + mov r14,QWORD PTR[8+rax] + mov r15,QWORD PTR[rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mod_exp_512_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mod_exp_512 + DD imagerel $L$SEH_end_mod_exp_512 + DD imagerel $L$SEH_info_mod_exp_512 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mod_exp_512:: +DB 9,0,0,0 + DD imagerel mod_exp_512_se_handler + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/rsaz-avx2.asm b/win-x86_64/crypto/bn/rsaz-avx2.asm new file mode 100644 index 0000000..f9188f5 --- /dev/null +++ b/win-x86_64/crypto/bn/rsaz-avx2.asm @@ -0,0 +1,29 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC rsaz_avx2_eligible + +rsaz_avx2_eligible PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +rsaz_avx2_eligible ENDP + +PUBLIC rsaz_1024_sqr_avx2 +PUBLIC rsaz_1024_mul_avx2 +PUBLIC rsaz_1024_norm2red_avx2 +PUBLIC rsaz_1024_red2norm_avx2 +PUBLIC rsaz_1024_scatter5_avx2 +PUBLIC rsaz_1024_gather5_avx2 + +rsaz_1024_sqr_avx2 PROC PUBLIC +rsaz_1024_mul_avx2:: +rsaz_1024_norm2red_avx2:: +rsaz_1024_red2norm_avx2:: +rsaz_1024_scatter5_avx2:: +rsaz_1024_gather5_avx2:: +DB 00fh,00bh + DB 0F3h,0C3h ;repret +rsaz_1024_sqr_avx2 ENDP + +.text$ ENDS +END diff --git a/win-x86_64/crypto/bn/rsaz-x86_64.asm b/win-x86_64/crypto/bn/rsaz-x86_64.asm new file mode 100644 index 0000000..86e828d --- /dev/null +++ b/win-x86_64/crypto/bn/rsaz-x86_64.asm @@ -0,0 +1,1326 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC rsaz_512_sqr + +ALIGN 32 +rsaz_512_sqr PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_sqr:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$sqr_body:: + mov rbp,rdx + mov rdx,QWORD PTR[rsi] + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[128+rsp],rcx + jmp $L$oop_sqr + +ALIGN 32 +$L$oop_sqr:: + mov DWORD PTR[((128+8))+rsp],r8d + + mov rbx,rdx + mul rdx + mov r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,rbx + mov r15,rdx + adc r15,0 + + add r8,r8 + mov rcx,r9 + adc r9,r9 + + mul rax + mov QWORD PTR[rsp],rax + add r8,rdx + adc r9,0 + + mov QWORD PTR[8+rsp],r8 + shr rcx,63 + + + mov r8,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mul r8 + add r10,rax + mov rax,QWORD PTR[24+rsi] + mov rbx,rdx + adc rbx,0 + + mul r8 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r11,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r12,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r13,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r14,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r15,rax + mov rax,r8 + adc rdx,0 + add r15,rbx + mov r8,rdx + mov rdx,r10 + adc r8,0 + + add rdx,rdx + lea r10,QWORD PTR[r10*2+rcx] + mov rbx,r11 + adc r11,r11 + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[16+rsp],r9 + mov QWORD PTR[24+rsp],r10 + shr rbx,63 + + + mov r9,QWORD PTR[16+rsi] + mov rax,QWORD PTR[24+rsi] + mul r9 + add r12,rax + mov rax,QWORD PTR[32+rsi] + mov rcx,rdx + adc rcx,0 + + mul r9 + add r13,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r13,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + add r14,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r14,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + mov r10,r12 + lea r12,QWORD PTR[r12*2+rbx] + add r15,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r15,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + shr r10,63 + add r8,rax + mov rax,r9 + adc rdx,0 + add r8,rcx + mov r9,rdx + adc r9,0 + + mov rcx,r13 + lea r13,QWORD PTR[r13*2+r10] + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[32+rsp],r11 + mov QWORD PTR[40+rsp],r12 + shr rcx,63 + + + mov r10,QWORD PTR[24+rsi] + mov rax,QWORD PTR[32+rsi] + mul r10 + add r14,rax + mov rax,QWORD PTR[40+rsi] + mov rbx,rdx + adc rbx,0 + + mul r10 + add r15,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r15,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + mov r12,r14 + lea r14,QWORD PTR[r14*2+rcx] + add r8,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r8,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + shr r12,63 + add r9,rax + mov rax,r10 + adc rdx,0 + add r9,rbx + mov r10,rdx + adc r10,0 + + mov rbx,r15 + lea r15,QWORD PTR[r15*2+r12] + + mul rax + add r13,rax + adc r14,rdx + adc r15,0 + + mov QWORD PTR[48+rsp],r13 + mov QWORD PTR[56+rsp],r14 + shr rbx,63 + + + mov r11,QWORD PTR[32+rsi] + mov rax,QWORD PTR[40+rsi] + mul r11 + add r8,rax + mov rax,QWORD PTR[48+rsi] + mov rcx,rdx + adc rcx,0 + + mul r11 + add r9,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + mov r12,r8 + lea r8,QWORD PTR[r8*2+rbx] + add r9,rcx + mov rcx,rdx + adc rcx,0 + + mul r11 + shr r12,63 + add r10,rax + mov rax,r11 + adc rdx,0 + add r10,rcx + mov r11,rdx + adc r11,0 + + mov rcx,r9 + lea r9,QWORD PTR[r9*2+r12] + + mul rax + add r15,rax + adc r8,rdx + adc r9,0 + + mov QWORD PTR[64+rsp],r15 + mov QWORD PTR[72+rsp],r8 + shr rcx,63 + + + mov r12,QWORD PTR[40+rsi] + mov rax,QWORD PTR[48+rsi] + mul r12 + add r10,rax + mov rax,QWORD PTR[56+rsi] + mov rbx,rdx + adc rbx,0 + + mul r12 + add r11,rax + mov rax,r12 + mov r15,r10 + lea r10,QWORD PTR[r10*2+rcx] + adc rdx,0 + shr r15,63 + add r11,rbx + mov r12,rdx + adc r12,0 + + mov rbx,r11 + lea r11,QWORD PTR[r11*2+r15] + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[80+rsp],r9 + mov QWORD PTR[88+rsp],r10 + + + mov r13,QWORD PTR[48+rsi] + mov rax,QWORD PTR[56+rsi] + mul r13 + add r12,rax + mov rax,r13 + mov r13,rdx + adc r13,0 + + xor r14,r14 + shl rbx,1 + adc r12,r12 + adc r13,r13 + adc r14,r14 + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[96+rsp],r11 + mov QWORD PTR[104+rsp],r12 + + + mov rax,QWORD PTR[56+rsi] + mul rax + add r13,rax + adc rdx,0 + + add r14,rdx + + mov QWORD PTR[112+rsp],r13 + mov QWORD PTR[120+rsp],r14 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD PTR[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz $L$oop_sqr + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$sqr_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_sqr:: +rsaz_512_sqr ENDP +PUBLIC rsaz_512_mul + +ALIGN 32 +rsaz_512_mul PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_body:: +DB 102,72,15,110,199 +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + mov rbx,QWORD PTR[rdx] + mov rbp,rdx + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul:: +rsaz_512_mul ENDP +PUBLIC rsaz_512_mul_gather4 + +ALIGN 32 +rsaz_512_mul_gather4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_gather4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_gather4_body:: + mov eax,DWORD PTR[64+r9*4+rdx] +DB 102,72,15,110,199 + mov ebx,DWORD PTR[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + + shl rax,32 + or rbx,rax + mov rax,QWORD PTR[rsi] + mov rcx,QWORD PTR[8+rsi] + lea rbp,QWORD PTR[128+r9*4+rdx] + mul rbx + mov QWORD PTR[rsp],rax + mov rax,rcx + mov r8,rdx + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + lea rbp,QWORD PTR[128+rbp] + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rsp] + mov ecx,7 + jmp $L$oop_mul_gather + +ALIGN 32 +$L$oop_mul_gather:: + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[128+rbp] + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul_gather + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_gather4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_gather4:: +rsaz_512_mul_gather4 ENDP +PUBLIC rsaz_512_mul_scatter4 + +ALIGN 32 +rsaz_512_mul_scatter4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_scatter4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_scatter4_body:: + lea r8,QWORD PTR[r9*4+r8] +DB 102,72,15,110,199 +DB 102,72,15,110,202 +DB 102,73,15,110,208 + mov QWORD PTR[128+rsp],rcx + + mov rbp,rdi + mov rbx,QWORD PTR[rdi] + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] +DB 102,72,15,126,214 + sbb rcx,rcx + + call __rsaz_512_subtract + + mov DWORD PTR[rsi],r8d + shr r8,32 + mov DWORD PTR[128+rsi],r9d + shr r9,32 + mov DWORD PTR[256+rsi],r10d + shr r10,32 + mov DWORD PTR[384+rsi],r11d + shr r11,32 + mov DWORD PTR[512+rsi],r12d + shr r12,32 + mov DWORD PTR[640+rsi],r13d + shr r13,32 + mov DWORD PTR[768+rsi],r14d + shr r14,32 + mov DWORD PTR[896+rsi],r15d + shr r15,32 + mov DWORD PTR[64+rsi],r8d + mov DWORD PTR[192+rsi],r9d + mov DWORD PTR[320+rsi],r10d + mov DWORD PTR[448+rsi],r11d + mov DWORD PTR[576+rsi],r12d + mov DWORD PTR[704+rsi],r13d + mov DWORD PTR[832+rsi],r14d + mov DWORD PTR[960+rsi],r15d + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_scatter4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_scatter4:: +rsaz_512_mul_scatter4 ENDP +PUBLIC rsaz_512_mul_by_one + +ALIGN 32 +rsaz_512_mul_by_one PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_by_one:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_by_one_body:: + mov rbp,rdx + mov QWORD PTR[128+rsp],rcx + + mov r8,QWORD PTR[rsi] + pxor xmm0,xmm0 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + + movdqa XMMWORD PTR[rsp],xmm0 + movdqa XMMWORD PTR[16+rsp],xmm0 + movdqa XMMWORD PTR[32+rsp],xmm0 + movdqa XMMWORD PTR[48+rsp],xmm0 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm0 + movdqa XMMWORD PTR[96+rsp],xmm0 + call __rsaz_512_reduce + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_by_one_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_by_one:: +rsaz_512_mul_by_one ENDP + +ALIGN 32 +__rsaz_512_reduce PROC PRIVATE + mov rbx,r8 + imul rbx,QWORD PTR[((128+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$reduction_loop + +ALIGN 32 +$L$reduction_loop:: + mul rbx + mov rax,QWORD PTR[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rbp] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r10,r11 + mov rsi,QWORD PTR[((128+8))+rsp] + + + adc rdx,0 + mov r11,rdx + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rbp] + adc rdx,0 + imul rsi,r8 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jne $L$reduction_loop + + DB 0F3h,0C3h ;repret +__rsaz_512_reduce ENDP + +ALIGN 32 +__rsaz_512_subtract PROC PRIVATE + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + mov r8,QWORD PTR[rbp] + mov r9,QWORD PTR[8+rbp] + neg r8 + not r9 + and r8,rcx + mov r10,QWORD PTR[16+rbp] + and r9,rcx + not r10 + mov r11,QWORD PTR[24+rbp] + and r10,rcx + not r11 + mov r12,QWORD PTR[32+rbp] + and r11,rcx + not r12 + mov r13,QWORD PTR[40+rbp] + and r12,rcx + not r13 + mov r14,QWORD PTR[48+rbp] + and r13,rcx + not r14 + mov r15,QWORD PTR[56+rbp] + and r14,rcx + not r15 + and r15,rcx + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_subtract ENDP + +ALIGN 32 +__rsaz_512_mul PROC PRIVATE + lea rdi,QWORD PTR[8+rsp] + + mov rax,QWORD PTR[rsi] + mul rbx + mov QWORD PTR[rdi],rax + mov rax,QWORD PTR[8+rsi] + mov r8,rdx + + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[8+rbp] + lea rdi,QWORD PTR[8+rdi] + + mov ecx,7 + jmp $L$oop_mul + +ALIGN 32 +$L$oop_mul:: + mov rbx,QWORD PTR[rbp] + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + lea rbp,QWORD PTR[8+rbp] + adc r14,0 + + mul rbx + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_mul ENDP +PUBLIC rsaz_512_scatter4 + +ALIGN 16 +rsaz_512_scatter4 PROC PUBLIC + lea rcx,QWORD PTR[r8*4+rcx] + mov r9d,8 + jmp $L$oop_scatter +ALIGN 16 +$L$oop_scatter:: + mov rax,QWORD PTR[rdx] + lea rdx,QWORD PTR[8+rdx] + mov DWORD PTR[rcx],eax + shr rax,32 + mov DWORD PTR[64+rcx],eax + lea rcx,QWORD PTR[128+rcx] + dec r9d + jnz $L$oop_scatter + DB 0F3h,0C3h ;repret +rsaz_512_scatter4 ENDP + +PUBLIC rsaz_512_gather4 + +ALIGN 16 +rsaz_512_gather4 PROC PUBLIC + lea rdx,QWORD PTR[r8*4+rdx] + mov r9d,8 + jmp $L$oop_gather +ALIGN 16 +$L$oop_gather:: + mov eax,DWORD PTR[rdx] + mov r8d,DWORD PTR[64+rdx] + lea rdx,QWORD PTR[128+rdx] + shl r8,32 + or rax,r8 + mov QWORD PTR[rcx],rax + lea rcx,QWORD PTR[8+rcx] + dec r9d + jnz $L$oop_gather + DB 0F3h,0C3h ;repret +rsaz_512_gather4 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rax,QWORD PTR[((128+24+48))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rsaz_512_sqr + DD imagerel $L$SEH_end_rsaz_512_sqr + DD imagerel $L$SEH_info_rsaz_512_sqr + + DD imagerel $L$SEH_begin_rsaz_512_mul + DD imagerel $L$SEH_end_rsaz_512_mul + DD imagerel $L$SEH_info_rsaz_512_mul + + DD imagerel $L$SEH_begin_rsaz_512_mul_gather4 + DD imagerel $L$SEH_end_rsaz_512_mul_gather4 + DD imagerel $L$SEH_info_rsaz_512_mul_gather4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_end_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_info_rsaz_512_mul_scatter4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_by_one + DD imagerel $L$SEH_end_rsaz_512_mul_by_one + DD imagerel $L$SEH_info_rsaz_512_mul_by_one + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rsaz_512_sqr:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue +$L$SEH_info_rsaz_512_mul:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_rsaz_512_mul_gather4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue +$L$SEH_info_rsaz_512_mul_scatter4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue +$L$SEH_info_rsaz_512_mul_by_one:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/x86_64-mont.asm b/win-x86_64/crypto/bn/x86_64-mont.asm new file mode 100644 index 0000000..a409325 --- /dev/null +++ b/win-x86_64/crypto/bn/x86_64-mont.asm @@ -0,0 +1,945 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont + +ALIGN 16 +bn_mul_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,3 + jnz $L$mul_enter + cmp r9d,8 + jb $L$mul_enter + cmp rdx,rsi + jne $L$mul4x_enter + test r9d,7 + jz $L$sqr8x_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[2+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul_body:: + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy:: + mov rsi,QWORD PTR[r14*8+rsp] + mov rcx,QWORD PTR[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rsi + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont:: +bn_mul_mont ENDP + +ALIGN 16 +bn_mul4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[4+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul4x_body:: + mov QWORD PTR[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$1st4x +ALIGN 16 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + lea r14,QWORD PTR[1+r14] +ALIGN 4 +$L$outer4x:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov r10,QWORD PTR[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$inner4x +ALIGN 16 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r15*8+rsp] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,QWORD PTR[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r9*8+rsp] + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + cmp r14,r9 + jb $L$outer4x + mov rdi,QWORD PTR[16+r9*8+rsp] + mov rax,QWORD PTR[rsp] + mov rdx,QWORD PTR[8+rsp] + shr r9,2 + lea rsi,QWORD PTR[rsp] + xor r14,r14 + + sub rax,QWORD PTR[rcx] + mov rbx,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[24+rsi] + sbb rdx,QWORD PTR[8+rcx] + lea r15,QWORD PTR[((-1))+r9] + jmp $L$sub4x +ALIGN 16 +$L$sub4x:: + mov QWORD PTR[r14*8+rdi],rax + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov rax,QWORD PTR[32+r14*8+rsi] + mov rdx,QWORD PTR[40+r14*8+rsi] + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + mov QWORD PTR[24+r14*8+rdi],rbp + sbb rax,QWORD PTR[32+r14*8+rcx] + mov rbx,QWORD PTR[48+r14*8+rsi] + mov rbp,QWORD PTR[56+r14*8+rsi] + sbb rdx,QWORD PTR[40+r14*8+rcx] + lea r14,QWORD PTR[4+r14] + dec r15 + jnz $L$sub4x + + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[32+r14*8+rsi] + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + + sbb rax,0 +DB 66h, 48h, 0fh, 6eh, 0c0h + punpcklqdq xmm0,xmm0 + mov QWORD PTR[24+r14*8+rdi],rbp + xor r14,r14 + + mov r15,r9 + pxor xmm5,xmm5 + jmp $L$copy4x +ALIGN 16 +$L$copy4x:: + movdqu xmm2,XMMWORD PTR[r14*1+rsp] + movdqu xmm4,XMMWORD PTR[16+r14*1+rsp] + movdqu xmm1,XMMWORD PTR[r14*1+rdi] + movdqu xmm3,XMMWORD PTR[16+r14*1+rdi] + pxor xmm2,xmm1 + pxor xmm4,xmm3 + pand xmm2,xmm0 + pand xmm4,xmm0 + pxor xmm2,xmm1 + pxor xmm4,xmm3 + movdqu XMMWORD PTR[r14*1+rdi],xmm2 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm4 + movdqa XMMWORD PTR[r14*1+rsp],xmm5 + movdqa XMMWORD PTR[16+r14*1+rsp],xmm5 + + lea r14,QWORD PTR[32+r14] + dec r15 + jnz $L$copy4x + + shl r9,2 + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont:: +bn_mul4x_mont ENDP +EXTERN bn_sqr8x_internal:NEAR + + +ALIGN 32 +bn_sqr8x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr8x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$sqr8x_enter:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r10d,r9d + shl r9d,3 + shl r10,3+2 + neg r9 + + + + + + + lea r11,QWORD PTR[((-64))+r9*4+rsp] + mov r8,QWORD PTR[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$sqr8x_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + jmp $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*4] + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$sqr8x_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + lea r11,QWORD PTR[64+r9*2+rsp] + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$sqr8x_body:: + + mov rbp,r9 +DB 102,73,15,110,211 + shr rbp,3+2 + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + jmp $L$sqr8x_copy_n + +ALIGN 32 +$L$sqr8x_copy_n:: + movq xmm0,QWORD PTR[rcx] + movq xmm1,QWORD PTR[8+rcx] + movq xmm3,QWORD PTR[16+rcx] + movq xmm4,QWORD PTR[24+rcx] + lea rcx,QWORD PTR[32+rcx] + movdqa XMMWORD PTR[r11],xmm0 + movdqa XMMWORD PTR[16+r11],xmm1 + movdqa XMMWORD PTR[32+r11],xmm3 + movdqa XMMWORD PTR[48+r11],xmm4 + lea r11,QWORD PTR[64+r11] + dec rbp + jnz $L$sqr8x_copy_n + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + call bn_sqr8x_internal + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + lea rdx,QWORD PTR[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD PTR[40+rsp] + jmp $L$sqr8x_zero + +ALIGN 32 +$L$sqr8x_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + movdqa XMMWORD PTR[rdx],xmm0 + movdqa XMMWORD PTR[16+rdx],xmm0 + movdqa XMMWORD PTR[32+rdx],xmm0 + movdqa XMMWORD PTR[48+rdx],xmm0 + lea rdx,QWORD PTR[64+rdx] + dec r9 + jnz $L$sqr8x_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$sqr8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_sqr8x_mont:: +bn_sqr8x_mont ENDP +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + jmp $L$common_seh_tail +mul_handler ENDP + + +ALIGN 16 +sqr_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[40+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +sqr_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont + DD imagerel $L$SEH_end_bn_mul_mont + DD imagerel $L$SEH_info_bn_mul_mont + + DD imagerel $L$SEH_begin_bn_mul4x_mont + DD imagerel $L$SEH_end_bn_mul4x_mont + DD imagerel $L$SEH_info_bn_mul4x_mont + + DD imagerel $L$SEH_begin_bn_sqr8x_mont + DD imagerel $L$SEH_end_bn_sqr8x_mont + DD imagerel $L$SEH_info_bn_sqr8x_mont +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_bn_mul4x_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +$L$SEH_info_bn_sqr8x_mont:: +DB 9,0,0,0 + DD imagerel sqr_handler + DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue + +.xdata ENDS +END diff --git a/win-x86_64/crypto/bn/x86_64-mont5.asm b/win-x86_64/crypto/bn/x86_64-mont5.asm new file mode 100644 index 0000000..90c6100 --- /dev/null +++ b/win-x86_64/crypto/bn/x86_64-mont5.asm @@ -0,0 +1,2061 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont_gather5 + +ALIGN 64 +bn_mul_mont_gather5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,7 + jnz $L$mul_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + mov r9d,r9d + mov rax,rsp + mov r10d,DWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + lea r11,QWORD PTR[2+r9] + neg r11 + lea rsp,QWORD PTR[r11*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: + mov r12,rdx + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+r12] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + +DB 102,72,15,126,195 + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy:: + mov rsi,QWORD PTR[r14*8+rsp] + mov rcx,QWORD PTR[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rsi + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont_gather5:: +bn_mul_mont_gather5 ENDP + +ALIGN 32 +bn_mul4x_mont_gather5 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$mul4xsp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mul4xsp_done:: + and rsp,-64 + neg r9 + + mov QWORD PTR[40+rsp],rax +$L$mul4x_body:: + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont_gather5:: +bn_mul4x_mont_gather5 ENDP + + +ALIGN 32 +mul4x_internal PROC PRIVATE + shl r9,5 + mov r10d,DWORD PTR[56+rax] + lea r13,QWORD PTR[256+r9*1+rdx] + shr r9,5 + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+rdx] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + add r11,7 + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + and r11,7 + + movq xmm0,QWORD PTR[(((-96)))+r12] + lea r14,QWORD PTR[256+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 +DB 067h + por xmm0,xmm1 + movq xmm1,QWORD PTR[((-96))+r14] +DB 067h + pand xmm3,xmm7 +DB 067h + por xmm0,xmm2 + movq xmm2,QWORD PTR[((-32))+r14] +DB 067h + pand xmm1,xmm4 +DB 067h + por xmm0,xmm3 + movq xmm3,QWORD PTR[32+r14] + +DB 102,72,15,126,195 + movq xmm0,QWORD PTR[96+r14] + mov QWORD PTR[((16+8))+rsp],r13 + mov QWORD PTR[((56+8))+rsp],rdi + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + lea rsi,QWORD PTR[r9*1+rsi] + neg r9 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + pand xmm2,xmm5 + pand xmm3,xmm6 + por xmm1,xmm2 + + imul rbp,r10 + + + + + + + + lea r14,QWORD PTR[((64+8))+r11*8+rsp] + mov r11,rdx + + pand xmm0,xmm7 + por xmm1,xmm3 + lea r12,QWORD PTR[512+r12] + por xmm0,xmm1 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + jmp $L$1st4x + +ALIGN 32 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + + add r15,32 + jnz $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + +DB 102,72,15,126,195 + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + jmp $L$outer4x + +ALIGN 32 +$L$outer4x:: + mov r10,QWORD PTR[r9*1+r14] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm0,QWORD PTR[(((-96)))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + + imul rbp,r10 +DB 067h + mov r11,rdx + mov QWORD PTR[r14],rdi + + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r14,QWORD PTR[r9*1+r14] + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov r13,rdx + jmp $L$inner4x + +ALIGN 32 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + add r10,QWORD PTR[r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov r13,rdx + + add r15,32 + jnz $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,rbp + mov rbp,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + +DB 102,72,15,126,195 + mov QWORD PTR[((-16))+r14],rdi + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r14] + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + cmp r12,QWORD PTR[((16+8))+rsp] + jb $L$outer4x + sub rbp,r13 + adc r15,r15 + or rdi,r15 + xor rdi,1 + lea rbx,QWORD PTR[r9*1+r14] + lea rbp,QWORD PTR[rdi*8+rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD PTR[((56+8))+rsp] + jmp $L$sqr4x_sub +mul4x_internal ENDP +PUBLIC bn_power5 + +ALIGN 32 +bn_power5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$pwr_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwr_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$power5_body:: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD PTR[40+rsp] + lea r8,QWORD PTR[32+rsp] + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$power5_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_power5:: +bn_power5 ENDP + +PUBLIC bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_internal PROC PUBLIC +__bn_sqr8x_internal:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,QWORD PTR[32+r10] + lea rsi,QWORD PTR[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,QWORD PTR[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + jmp $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD PTR[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD PTR[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,QWORD PTR[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,QWORD PTR[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + jmp $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer:: + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD PTR[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD PTR[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD PTR[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rbp*1+rdi],r10 + + lea rcx,QWORD PTR[rbp] + jmp $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD PTR[rcx*1+rdi] + adc r12,0 + +DB 067h + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD PTR[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD PTR[8+rcx*1+rdi] + lea rcx,QWORD PTR[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_inner + +DB 067h + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + add rbp,16 + jnz $L$sqr4x_outer + + + mov r14,QWORD PTR[((-32))+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD PTR[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD PTR[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD PTR[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD PTR[8+rdi],rax + mov QWORD PTR[16+rdi],rdx + mov QWORD PTR[24+rdi],r15 + + mov rax,QWORD PTR[((-16))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD PTR[8+rdi] + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + jmp $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add:: + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rdi],rbx + adc r8,rdx + + lea r12,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[8+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[16+rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + add rbp,32 + jnz $L$sqr4x_shift_n_add + + lea r12,QWORD PTR[r10*2+r14] +DB 067h + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD PTR[((-16))+rdi],rbx + mov QWORD PTR[((-8))+rdi],r8 +DB 102,72,15,126,213 +sqr8x_reduction:: + xor rax,rax + lea rcx,QWORD PTR[r9*2+rbp] + lea rdx,QWORD PTR[((48+8))+r9*2+rsp] + mov QWORD PTR[((0+8))+rsp],rcx + lea rdi,QWORD PTR[((48+8))+r9*1+rsp] + mov QWORD PTR[((8+8))+rsp],rdx + neg r9 + jmp $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop:: + lea rdi,QWORD PTR[r9*1+rdi] +DB 066h + mov rbx,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdx],rax + lea rdi,QWORD PTR[64+rdi] + +DB 067h + mov r8,rbx + imul rbx,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$8x_reduce + +ALIGN 32 +$L$8x_reduce:: + mul rbx + mov rax,QWORD PTR[16+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD PTR[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_reduce + + lea rbp,QWORD PTR[128+rbp] + xor rax,rax + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_no_tail + +DB 066h + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD PTR[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD PTR[rbp] + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail:: + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rbp] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + lea rdi,QWORD PTR[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD PTR[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_tail + + lea rbp,QWORD PTR[128+rbp] + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_tail_done + + mov rbx,QWORD PTR[((48+56+8))+rsp] + neg rsi + mov rax,QWORD PTR[rbp] + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail_done:: + add r8,QWORD PTR[rdx] + xor rax,rax + + neg rsi +$L$8x_no_tail:: + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + adc rax,0 + mov rcx,QWORD PTR[((-16))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + lea rdi,QWORD PTR[64+rdi] + + cmp rdi,rdx + jb $L$8x_reduction_loop + + sub rcx,r15 + lea rbx,QWORD PTR[r9*1+rdi] + adc rsi,rsi + mov rcx,r9 + or rax,rsi +DB 102,72,15,126,207 + xor rax,1 +DB 102,72,15,126,206 + lea rbp,QWORD PTR[rax*8+rbp] + sar rcx,3+2 + jmp $L$sqr4x_sub + +ALIGN 32 +$L$sqr4x_sub:: +DB 066h + mov r12,QWORD PTR[rbx] + mov r13,QWORD PTR[8+rbx] + sbb r12,QWORD PTR[rbp] + mov r14,QWORD PTR[16+rbx] + sbb r13,QWORD PTR[16+rbp] + mov r15,QWORD PTR[24+rbx] + lea rbx,QWORD PTR[32+rbx] + sbb r14,QWORD PTR[32+rbp] + mov QWORD PTR[rdi],r12 + sbb r15,QWORD PTR[48+rbp] + lea rbp,QWORD PTR[64+rbp] + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + lea rdi,QWORD PTR[32+rdi] + + inc rcx + jnz $L$sqr4x_sub + mov r10,r9 + neg r9 + DB 0F3h,0C3h ;repret +bn_sqr8x_internal ENDP +PUBLIC bn_from_montgomery + +ALIGN 32 +bn_from_montgomery PROC PUBLIC + test DWORD PTR[48+rsp],7 + jz bn_from_mont8x + xor eax,eax + DB 0F3h,0C3h ;repret +bn_from_montgomery ENDP + + +ALIGN 32 +bn_from_mont8x PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_from_mont8x:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$from_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$from_sp_done + +ALIGN 32 +$L$from_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$from_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$from_body:: + mov r11,r9 + lea rax,QWORD PTR[48+rsp] + pxor xmm0,xmm0 + jmp $L$mul_by_1 + +ALIGN 32 +$L$mul_by_1:: + movdqu xmm1,XMMWORD PTR[rsi] + movdqu xmm2,XMMWORD PTR[16+rsi] + movdqu xmm3,XMMWORD PTR[32+rsi] + movdqa XMMWORD PTR[r9*1+rax],xmm0 + movdqu xmm4,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[16+r9*1+rax],xmm0 +DB 048h,08dh,0b6h,040h,000h,000h,000h + movdqa XMMWORD PTR[rax],xmm1 + movdqa XMMWORD PTR[32+r9*1+rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm2 + movdqa XMMWORD PTR[48+r9*1+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm3 + movdqa XMMWORD PTR[48+rax],xmm4 + lea rax,QWORD PTR[64+rax] + sub r11,64 + jnz $L$mul_by_1 + +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 067h + mov rbp,rcx +DB 102,73,15,110,218 + call sqr8x_reduction + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + mov rsi,QWORD PTR[40+rsp] + jmp $L$from_mont_zero + +ALIGN 32 +$L$from_mont_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + sub r9,32 + jnz $L$from_mont_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$from_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_from_mont8x:: +bn_from_mont8x ENDP +PUBLIC bn_scatter5 + +ALIGN 16 +bn_scatter5 PROC PUBLIC + cmp edx,0 + jz $L$scatter_epilogue + lea r8,QWORD PTR[r9*8+r8] +$L$scatter:: + mov rax,QWORD PTR[rcx] + lea rcx,QWORD PTR[8+rcx] + mov QWORD PTR[r8],rax + lea r8,QWORD PTR[256+r8] + sub edx,1 + jnz $L$scatter +$L$scatter_epilogue:: + DB 0F3h,0C3h ;repret +bn_scatter5 ENDP + +PUBLIC bn_gather5 + +ALIGN 16 +bn_gather5 PROC PUBLIC +$L$SEH_begin_bn_gather5:: + +DB 048h,083h,0ech,028h +DB 00fh,029h,034h,024h +DB 00fh,029h,07ch,024h,010h + mov r11d,r9d + shr r9d,3 + and r11,7 + not r9d + lea rax,QWORD PTR[$L$magic_masks] + and r9d,3 + lea r8,QWORD PTR[128+r11*8+r8] + movq xmm4,QWORD PTR[r9*8+rax] + movq xmm5,QWORD PTR[8+r9*8+rax] + movq xmm6,QWORD PTR[16+r9*8+rax] + movq xmm7,QWORD PTR[24+r9*8+rax] + jmp $L$gather +ALIGN 16 +$L$gather:: + movq xmm0,QWORD PTR[(((-128)))+r8] + movq xmm1,QWORD PTR[((-64))+r8] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[r8] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[64+r8] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 +DB 067h,067h + por xmm0,xmm2 + lea r8,QWORD PTR[256+r8] + por xmm0,xmm3 + + movq QWORD PTR[rcx],xmm0 + lea rcx,QWORD PTR[8+rcx] + sub edx,1 + jnz $L$gather + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + lea rsp,QWORD PTR[40+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_gather5:: +bn_gather5 ENDP +ALIGN 64 +$L$magic_masks:: + DD 0,0,0,0,0,0,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 +DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 +DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 +DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 +DB 112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea r10,QWORD PTR[$L$mul_epilogue] + cmp rbx,r10 + jb $L$body_40 + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + jmp $L$body_proceed + +$L$body_40:: + mov rax,QWORD PTR[40+rax] +$L$body_proceed:: + + movaps xmm0,XMMWORD PTR[((-88))+rax] + movaps xmm1,XMMWORD PTR[((-72))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + movups XMMWORD PTR[512+r8],xmm0 + movups XMMWORD PTR[528+r8],xmm1 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mul_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont_gather5 + DD imagerel $L$SEH_end_bn_mul_mont_gather5 + DD imagerel $L$SEH_info_bn_mul_mont_gather5 + + DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 + + DD imagerel $L$SEH_begin_bn_power5 + DD imagerel $L$SEH_end_bn_power5 + DD imagerel $L$SEH_info_bn_power5 + + DD imagerel $L$SEH_begin_bn_from_mont8x + DD imagerel $L$SEH_end_bn_from_mont8x + DD imagerel $L$SEH_info_bn_from_mont8x + DD imagerel $L$SEH_begin_bn_gather5 + DD imagerel $L$SEH_end_bn_gather5 + DD imagerel $L$SEH_info_bn_gather5 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +ALIGN 8 +$L$SEH_info_bn_mul4x_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +ALIGN 8 +$L$SEH_info_bn_power5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$power5_body,imagerel $L$power5_epilogue +ALIGN 8 +$L$SEH_info_bn_from_mont8x:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$from_body,imagerel $L$from_epilogue +ALIGN 8 +$L$SEH_info_bn_gather5:: +DB 001h,00dh,005h,000h +DB 00dh,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,042h,000h,000h +ALIGN 8 + +.xdata ENDS +END |