%ifidn __OUTPUT_FORMAT__,obj section code use32 class=code align=64 %elifidn __OUTPUT_FORMAT__,win32 %ifdef __YASM_VERSION_ID__ %if __YASM_VERSION_ID__ < 01010000h %error yasm version 1.1.0 or later needed. %endif ; Yasm automatically includes .00 and complains about redefining it. ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html %else $@feat.00 equ 1 %endif section .text code align=64 %else section .text code %endif ;extern _OPENSSL_ia32cap_P global _bn_mul_mont align 16 _bn_mul_mont: L$_bn_mul_mont_begin: push ebp push ebx push esi push edi xor eax,eax mov edi,DWORD [40+esp] cmp edi,4 jl NEAR L$000just_leave lea esi,[20+esp] lea edx,[24+esp] mov ebp,esp add edi,2 neg edi lea esp,[edi*4+esp-32] neg edi mov eax,esp sub eax,edx and eax,2047 sub esp,eax xor edx,esp and edx,2048 xor edx,2048 sub esp,edx and esp,-64 mov eax,DWORD [esi] mov ebx,DWORD [4+esi] mov ecx,DWORD [8+esi] mov edx,DWORD [12+esi] mov esi,DWORD [16+esi] mov esi,DWORD [esi] mov DWORD [4+esp],eax mov DWORD [8+esp],ebx mov DWORD [12+esp],ecx mov DWORD [16+esp],edx mov DWORD [20+esp],esi lea ebx,[edi-3] mov DWORD [24+esp],ebp lea eax,[_OPENSSL_ia32cap_P] bt DWORD [eax],26 jnc NEAR L$001non_sse2 mov eax,-1 movd mm7,eax mov esi,DWORD [8+esp] mov edi,DWORD [12+esp] mov ebp,DWORD [16+esp] xor edx,edx xor ecx,ecx movd mm4,DWORD [edi] movd mm5,DWORD [esi] movd mm3,DWORD [ebp] pmuludq mm5,mm4 movq mm2,mm5 movq mm0,mm5 pand mm0,mm7 pmuludq mm5,[20+esp] pmuludq mm3,mm5 paddq mm3,mm0 movd mm1,DWORD [4+ebp] movd mm0,DWORD [4+esi] psrlq mm2,32 psrlq mm3,32 inc ecx align 16 L$0021st: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 paddq mm3,mm1 movq mm0,mm2 pand mm0,mm7 movd mm1,DWORD [4+ecx*4+ebp] paddq mm3,mm0 movd mm0,DWORD [4+ecx*4+esi] psrlq mm2,32 movd DWORD [28+ecx*4+esp],mm3 psrlq mm3,32 lea ecx,[1+ecx] cmp ecx,ebx jl NEAR L$0021st pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 paddq mm3,mm1 movq mm0,mm2 pand mm0,mm7 paddq mm3,mm0 movd DWORD [28+ecx*4+esp],mm3 psrlq mm2,32 psrlq mm3,32 paddq mm3,mm2 movq [32+ebx*4+esp],mm3 inc edx L$003outer: xor ecx,ecx movd mm4,DWORD [edx*4+edi] movd mm5,DWORD [esi] movd mm6,DWORD [32+esp] movd mm3,DWORD [ebp] pmuludq mm5,mm4 paddq mm5,mm6 movq mm0,mm5 movq mm2,mm5 pand mm0,mm7 pmuludq mm5,[20+esp] pmuludq mm3,mm5 paddq mm3,mm0 movd mm6,DWORD [36+esp] movd mm1,DWORD [4+ebp] movd mm0,DWORD [4+esi] psrlq mm2,32 psrlq mm3,32 paddq mm2,mm6 inc ecx dec ebx L$004inner: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 paddq mm3,mm1 movq mm0,mm2 movd mm6,DWORD [36+ecx*4+esp] pand mm0,mm7 movd mm1,DWORD [4+ecx*4+ebp] paddq mm3,mm0 movd mm0,DWORD [4+ecx*4+esi] psrlq mm2,32 movd DWORD [28+ecx*4+esp],mm3 psrlq mm3,32 paddq mm2,mm6 dec ebx lea ecx,[1+ecx] jnz NEAR L$004inner mov ebx,ecx pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 paddq mm3,mm1 movq mm0,mm2 pand mm0,mm7 paddq mm3,mm0 movd DWORD [28+ecx*4+esp],mm3 psrlq mm2,32 psrlq mm3,32 movd mm6,DWORD [36+ebx*4+esp] paddq mm3,mm2 paddq mm3,mm6 movq [32+ebx*4+esp],mm3 lea edx,[1+edx] cmp edx,ebx jle NEAR L$003outer emms jmp NEAR L$005common_tail align 16 L$001non_sse2: mov esi,DWORD [8+esp] lea ebp,[1+ebx] mov edi,DWORD [12+esp] xor ecx,ecx mov edx,esi and ebp,1 sub edx,edi lea eax,[4+ebx*4+edi] or ebp,edx mov edi,DWORD [edi] jz NEAR L$006bn_sqr_mont mov DWORD [28+esp],eax mov eax,DWORD [esi] xor edx,edx align 16 L$007mull: mov ebp,edx mul edi add ebp,eax lea ecx,[1+ecx] adc edx,0 mov eax,DWORD [ecx*4+esi] cmp ecx,ebx mov DWORD [28+ecx*4+esp],ebp jl NEAR L$007mull mov ebp,edx mul edi mov edi,DWORD [20+esp] add eax,ebp mov esi,DWORD [16+esp] adc edx,0 imul edi,DWORD [32+esp] mov DWORD [32+ebx*4+esp],eax xor ecx,ecx mov DWORD [36+ebx*4+esp],edx mov DWORD [40+ebx*4+esp],ecx mov eax,DWORD [esi] mul edi add eax,DWORD [32+esp] mov eax,DWORD [4+esi] adc edx,0 inc ecx jmp NEAR L$0082ndmadd align 16 L$0091stmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] lea ecx,[1+ecx] adc edx,0 add ebp,eax mov eax,DWORD [ecx*4+esi] adc edx,0 cmp ecx,ebx mov DWORD [28+ecx*4+esp],ebp jl NEAR L$0091stmadd mov ebp,edx mul edi add eax,DWORD [32+ebx*4+esp] mov edi,DWORD [20+esp] adc edx,0 mov esi,DWORD [16+esp] add ebp,eax adc edx,0 imul edi,DWORD [32+esp] xor ecx,ecx add edx,DWORD [36+ebx*4+esp] mov DWORD [32+ebx*4+esp],ebp adc ecx,0 mov eax,DWORD [esi] mov DWORD [36+ebx*4+esp],edx mov DWORD [40+ebx*4+esp],ecx mul edi add eax,DWORD [32+esp] mov eax,DWORD [4+esi] adc edx,0 mov ecx,1 align 16 L$0082ndmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] lea ecx,[1+ecx] adc edx,0 add ebp,eax mov eax,DWORD [ecx*4+esi] adc edx,0 cmp ecx,ebx mov DWORD [24+ecx*4+esp],ebp jl NEAR L$0082ndmadd mov ebp,edx mul edi add ebp,DWORD [32+ebx*4+esp] adc edx,0 add ebp,eax adc edx,0 mov DWORD [28+ebx*4+esp],ebp xor eax,eax mov ecx,DWORD [12+esp] add edx,DWORD [36+ebx*4+esp] adc eax,DWORD [40+ebx*4+esp] lea ecx,[4+ecx] mov DWORD [32+ebx*4+esp],edx cmp ecx,DWORD [28+esp] mov DWORD [36+ebx*4+esp],eax je NEAR L$005common_tail mov edi,DWORD [ecx] mov esi,DWORD [8+esp] mov DWORD [12+esp],ecx xor ecx,ecx xor edx,edx mov eax,DWORD [esi] jmp NEAR L$0091stmadd align 16 L$006bn_sqr_mont: mov DWORD [esp],ebx mov DWORD [12+esp],ecx mov eax,edi mul edi mov DWORD [32+esp],eax mov ebx,edx shr edx,1 and ebx,1 inc ecx align 16 L$010sqr: mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi add eax,ebp lea ecx,[1+ecx] adc edx,0 lea ebp,[eax*2+ebx] shr eax,31 cmp ecx,DWORD [esp] mov ebx,eax mov DWORD [28+ecx*4+esp],ebp jl NEAR L$010sqr mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi add eax,ebp mov edi,DWORD [20+esp] adc edx,0 mov esi,DWORD [16+esp] lea ebp,[eax*2+ebx] imul edi,DWORD [32+esp] shr eax,31 mov DWORD [32+ecx*4+esp],ebp lea ebp,[edx*2+eax] mov eax,DWORD [esi] shr edx,31 mov DWORD [36+ecx*4+esp],ebp mov DWORD [40+ecx*4+esp],edx mul edi add eax,DWORD [32+esp] mov ebx,ecx adc edx,0 mov eax,DWORD [4+esi] mov ecx,1 align 16 L$0113rdmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] adc edx,0 add ebp,eax mov eax,DWORD [4+ecx*4+esi] adc edx,0 mov DWORD [28+ecx*4+esp],ebp mov ebp,edx mul edi add ebp,DWORD [36+ecx*4+esp] lea ecx,[2+ecx] adc edx,0 add ebp,eax mov eax,DWORD [ecx*4+esi] adc edx,0 cmp ecx,ebx mov DWORD [24+ecx*4+esp],ebp jl NEAR L$0113rdmadd mov ebp,edx mul edi add ebp,DWORD [32+ebx*4+esp] adc edx,0 add ebp,eax adc edx,0 mov DWORD [28+ebx*4+esp],ebp mov ecx,DWORD [12+esp] xor eax,eax mov esi,DWORD [8+esp] add edx,DWORD [36+ebx*4+esp] adc eax,DWORD [40+ebx*4+esp] mov DWORD [32+ebx*4+esp],edx cmp ecx,ebx mov DWORD [36+ebx*4+esp],eax je NEAR L$005common_tail mov edi,DWORD [4+ecx*4+esi] lea ecx,[1+ecx] mov eax,edi mov DWORD [12+esp],ecx mul edi add eax,DWORD [32+ecx*4+esp] adc edx,0 mov DWORD [32+ecx*4+esp],eax xor ebp,ebp cmp ecx,ebx lea ecx,[1+ecx] je NEAR L$012sqrlast mov ebx,edx shr edx,1 and ebx,1 align 16 L$013sqradd: mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi add eax,ebp lea ebp,[eax*1+eax] adc edx,0 shr eax,31 add ebp,DWORD [32+ecx*4+esp] lea ecx,[1+ecx] adc eax,0 add ebp,ebx adc eax,0 cmp ecx,DWORD [esp] mov DWORD [28+ecx*4+esp],ebp mov ebx,eax jle NEAR L$013sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 L$012sqrlast: mov edi,DWORD [20+esp] mov esi,DWORD [16+esp] imul edi,DWORD [32+esp] add edx,DWORD [32+ecx*4+esp] mov eax,DWORD [esi] adc ebp,0 mov DWORD [32+ecx*4+esp],edx mov DWORD [36+ecx*4+esp],ebp mul edi add eax,DWORD [32+esp] lea ebx,[ecx-1] adc edx,0 mov ecx,1 mov eax,DWORD [4+esi] jmp NEAR L$0113rdmadd align 16 L$005common_tail: mov ebp,DWORD [16+esp] mov edi,DWORD [4+esp] lea esi,[32+esp] mov eax,DWORD [esi] mov ecx,ebx xor edx,edx align 16 L$014sub: sbb eax,DWORD [edx*4+ebp] mov DWORD [edx*4+edi],eax dec ecx mov eax,DWORD [4+edx*4+esi] lea edx,[1+edx] jge NEAR L$014sub sbb eax,0 align 16 L$015copy: mov edx,DWORD [ebx*4+esi] mov ebp,DWORD [ebx*4+edi] xor edx,ebp and edx,eax xor edx,ebp mov DWORD [ebx*4+esi],ecx mov DWORD [ebx*4+edi],edx dec ebx jge NEAR L$015copy mov esp,DWORD [24+esp] mov eax,1 L$000just_leave: pop edi pop esi pop ebx pop ebp ret db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 db 111,114,103,62,0 segment .bss common _OPENSSL_ia32cap_P 16