summaryrefslogtreecommitdiffstats
path: root/win-x86
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-05-11 17:20:37 -0700
committerKenny Root <kroot@google.com>2015-05-12 23:06:14 +0000
commite9ada863a7b3e81f5d2b1e3bdd2305da902a87f5 (patch)
tree6e43e34595ecf887c26c32b86d8ab097fe8cac64 /win-x86
parentb3106a0cc1493bbe0505c0ec0ce3da4ca90a29ae (diff)
downloadexternal_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.zip
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.gz
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.bz2
external/boringssl: bump revision.
This change bumps the BoringSSL revision to the current tip-of-tree. Change-Id: I91d5bf467e16e8d86cb19a4de873985f524e5faa
Diffstat (limited to 'win-x86')
-rw-r--r--win-x86/crypto/aes/aes-586.asm3219
-rw-r--r--win-x86/crypto/aes/aesni-x86.asm2424
-rw-r--r--win-x86/crypto/aes/vpaes-x86.asm649
-rw-r--r--win-x86/crypto/bn/bn-586.asm1523
-rw-r--r--win-x86/crypto/bn/co-586.asm1260
-rw-r--r--win-x86/crypto/bn/x86-mont.asm469
-rw-r--r--win-x86/crypto/cpu-x86-asm.asm303
-rw-r--r--win-x86/crypto/md5/md5-586.asm691
-rw-r--r--win-x86/crypto/modes/ghash-x86.asm1265
-rw-r--r--win-x86/crypto/rc4/rc4-586.asm382
-rw-r--r--win-x86/crypto/sha/sha1-586.asm2805
-rw-r--r--win-x86/crypto/sha/sha256-586.asm4591
-rw-r--r--win-x86/crypto/sha/sha512-586.asm2843
13 files changed, 22424 insertions, 0 deletions
diff --git a/win-x86/crypto/aes/aes-586.asm b/win-x86/crypto/aes/aes-586.asm
new file mode 100644
index 0000000..42ca026
--- /dev/null
+++ b/win-x86/crypto/aes/aes-586.asm
@@ -0,0 +1,3219 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+align 16
+__x86_AES_encrypt_compact:
+ mov DWORD [20+esp],edi
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+ mov edi,DWORD [ebp-128]
+ mov esi,DWORD [ebp-96]
+ mov edi,DWORD [ebp-64]
+ mov esi,DWORD [ebp-32]
+ mov edi,DWORD [ebp]
+ mov esi,DWORD [32+ebp]
+ mov edi,DWORD [64+ebp]
+ mov esi,DWORD [96+ebp]
+align 16
+L$000loop:
+ mov esi,eax
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ shr ebx,16
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,ch
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,eax
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ shr ecx,24
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,dh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edx,255
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ and edx,255
+ movzx edx,BYTE [edx*1+ebp-128]
+ movzx eax,ah
+ movzx eax,BYTE [eax*1+ebp-128]
+ shl eax,8
+ xor edx,eax
+ mov eax,DWORD [4+esp]
+ and ebx,255
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ shl ebx,16
+ xor edx,ebx
+ mov ebx,DWORD [8+esp]
+ movzx ecx,BYTE [ecx*1+ebp-128]
+ shl ecx,24
+ xor edx,ecx
+ mov ecx,esi
+ mov ebp,2155905152
+ and ebp,ecx
+ lea edi,[ecx*1+ecx]
+ mov esi,ebp
+ shr ebp,7
+ and edi,4278124286
+ sub esi,ebp
+ mov ebp,ecx
+ and esi,454761243
+ ror ebp,16
+ xor esi,edi
+ mov edi,ecx
+ xor ecx,esi
+ ror edi,24
+ xor esi,ebp
+ rol ecx,24
+ xor esi,edi
+ mov ebp,2155905152
+ xor ecx,esi
+ and ebp,edx
+ lea edi,[edx*1+edx]
+ mov esi,ebp
+ shr ebp,7
+ and edi,4278124286
+ sub esi,ebp
+ mov ebp,edx
+ and esi,454761243
+ ror ebp,16
+ xor esi,edi
+ mov edi,edx
+ xor edx,esi
+ ror edi,24
+ xor esi,ebp
+ rol edx,24
+ xor esi,edi
+ mov ebp,2155905152
+ xor edx,esi
+ and ebp,eax
+ lea edi,[eax*1+eax]
+ mov esi,ebp
+ shr ebp,7
+ and edi,4278124286
+ sub esi,ebp
+ mov ebp,eax
+ and esi,454761243
+ ror ebp,16
+ xor esi,edi
+ mov edi,eax
+ xor eax,esi
+ ror edi,24
+ xor esi,ebp
+ rol eax,24
+ xor esi,edi
+ mov ebp,2155905152
+ xor eax,esi
+ and ebp,ebx
+ lea edi,[ebx*1+ebx]
+ mov esi,ebp
+ shr ebp,7
+ and edi,4278124286
+ sub esi,ebp
+ mov ebp,ebx
+ and esi,454761243
+ ror ebp,16
+ xor esi,edi
+ mov edi,ebx
+ xor ebx,esi
+ ror edi,24
+ xor esi,ebp
+ rol ebx,24
+ xor esi,edi
+ xor ebx,esi
+ mov edi,DWORD [20+esp]
+ mov ebp,DWORD [28+esp]
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ cmp edi,DWORD [24+esp]
+ mov DWORD [20+esp],edi
+ jb NEAR L$000loop
+ mov esi,eax
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ shr ebx,16
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,ch
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,eax
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ shr ecx,24
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,dh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edx,255
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov edi,DWORD [20+esp]
+ and edx,255
+ movzx edx,BYTE [edx*1+ebp-128]
+ movzx eax,ah
+ movzx eax,BYTE [eax*1+ebp-128]
+ shl eax,8
+ xor edx,eax
+ mov eax,DWORD [4+esp]
+ and ebx,255
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ shl ebx,16
+ xor edx,ebx
+ mov ebx,DWORD [8+esp]
+ movzx ecx,BYTE [ecx*1+ebp-128]
+ shl ecx,24
+ xor edx,ecx
+ mov ecx,esi
+ xor eax,DWORD [16+edi]
+ xor ebx,DWORD [20+edi]
+ xor ecx,DWORD [24+edi]
+ xor edx,DWORD [28+edi]
+ ret
+align 16
+__sse_AES_encrypt_compact:
+ pxor mm0,[edi]
+ pxor mm4,[8+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+ mov eax,454761243
+ mov DWORD [8+esp],eax
+ mov DWORD [12+esp],eax
+ mov eax,DWORD [ebp-128]
+ mov ebx,DWORD [ebp-96]
+ mov ecx,DWORD [ebp-64]
+ mov edx,DWORD [ebp-32]
+ mov eax,DWORD [ebp]
+ mov ebx,DWORD [32+ebp]
+ mov ecx,DWORD [64+ebp]
+ mov edx,DWORD [96+ebp]
+align 16
+L$001loop:
+ pshufw mm1,mm0,8
+ pshufw mm5,mm4,13
+ movd eax,mm1
+ movd ebx,mm5
+ mov DWORD [20+esp],edi
+ movzx esi,al
+ movzx edx,ah
+ pshufw mm2,mm0,13
+ movzx ecx,BYTE [esi*1+ebp-128]
+ movzx edi,bl
+ movzx edx,BYTE [edx*1+ebp-128]
+ shr eax,16
+ shl edx,8
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,16
+ pshufw mm6,mm4,8
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,ah
+ shl esi,24
+ shr ebx,16
+ or edx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,8
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,al
+ shl esi,24
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bl
+ movd eax,mm2
+ movd mm0,ecx
+ movzx ecx,BYTE [edi*1+ebp-128]
+ movzx edi,ah
+ shl ecx,16
+ movd ebx,mm6
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,24
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bl
+ shl esi,8
+ shr ebx,16
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,al
+ shr eax,16
+ movd mm1,ecx
+ movzx ecx,BYTE [edi*1+ebp-128]
+ movzx edi,ah
+ shl ecx,16
+ and eax,255
+ or ecx,esi
+ punpckldq mm0,mm1
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,24
+ and ebx,255
+ movzx eax,BYTE [eax*1+ebp-128]
+ or ecx,esi
+ shl eax,16
+ movzx esi,BYTE [edi*1+ebp-128]
+ or edx,eax
+ shl esi,8
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ or ecx,esi
+ or edx,ebx
+ mov edi,DWORD [20+esp]
+ movd mm4,ecx
+ movd mm5,edx
+ punpckldq mm4,mm5
+ add edi,16
+ cmp edi,DWORD [24+esp]
+ ja NEAR L$002out
+ movq mm2,[8+esp]
+ pxor mm3,mm3
+ pxor mm7,mm7
+ movq mm1,mm0
+ movq mm5,mm4
+ pcmpgtb mm3,mm0
+ pcmpgtb mm7,mm4
+ pand mm3,mm2
+ pand mm7,mm2
+ pshufw mm2,mm0,177
+ pshufw mm6,mm4,177
+ paddb mm0,mm0
+ paddb mm4,mm4
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pshufw mm3,mm2,177
+ pshufw mm7,mm6,177
+ pxor mm1,mm0
+ pxor mm5,mm4
+ pxor mm0,mm2
+ pxor mm4,mm6
+ movq mm2,mm3
+ movq mm6,mm7
+ pslld mm3,8
+ pslld mm7,8
+ psrld mm2,24
+ psrld mm6,24
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pxor mm0,mm2
+ pxor mm4,mm6
+ movq mm3,mm1
+ movq mm7,mm5
+ movq mm2,[edi]
+ movq mm6,[8+edi]
+ psrld mm1,8
+ psrld mm5,8
+ mov eax,DWORD [ebp-128]
+ pslld mm3,24
+ pslld mm7,24
+ mov ebx,DWORD [ebp-64]
+ pxor mm0,mm1
+ pxor mm4,mm5
+ mov ecx,DWORD [ebp]
+ pxor mm0,mm3
+ pxor mm4,mm7
+ mov edx,DWORD [64+ebp]
+ pxor mm0,mm2
+ pxor mm4,mm6
+ jmp NEAR L$001loop
+align 16
+L$002out:
+ pxor mm0,[edi]
+ pxor mm4,[8+edi]
+ ret
+align 16
+__x86_AES_encrypt:
+ mov DWORD [20+esp],edi
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+align 16
+L$003loop:
+ mov esi,eax
+ and esi,255
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,bh
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ mov edi,edx
+ shr edi,24
+ xor esi,DWORD [1+edi*8+ebp]
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ shr ebx,16
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,ch
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ mov edi,eax
+ shr edi,24
+ xor esi,DWORD [1+edi*8+ebp]
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ shr ecx,24
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,dh
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,eax
+ shr edi,16
+ and edx,255
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ movzx edi,bh
+ xor esi,DWORD [1+edi*8+ebp]
+ mov edi,DWORD [20+esp]
+ mov edx,DWORD [edx*8+ebp]
+ movzx eax,ah
+ xor edx,DWORD [3+eax*8+ebp]
+ mov eax,DWORD [4+esp]
+ and ebx,255
+ xor edx,DWORD [2+ebx*8+ebp]
+ mov ebx,DWORD [8+esp]
+ xor edx,DWORD [1+ecx*8+ebp]
+ mov ecx,esi
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ cmp edi,DWORD [24+esp]
+ mov DWORD [20+esp],edi
+ jb NEAR L$003loop
+ mov esi,eax
+ and esi,255
+ mov esi,DWORD [2+esi*8+ebp]
+ and esi,255
+ movzx edi,bh
+ mov edi,DWORD [edi*8+ebp]
+ and edi,65280
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ mov edi,DWORD [edi*8+ebp]
+ and edi,16711680
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ mov edi,DWORD [2+edi*8+ebp]
+ and edi,4278190080
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ shr ebx,16
+ mov esi,DWORD [2+esi*8+ebp]
+ and esi,255
+ movzx edi,ch
+ mov edi,DWORD [edi*8+ebp]
+ and edi,65280
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ mov edi,DWORD [edi*8+ebp]
+ and edi,16711680
+ xor esi,edi
+ mov edi,eax
+ shr edi,24
+ mov edi,DWORD [2+edi*8+ebp]
+ and edi,4278190080
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ shr ecx,24
+ mov esi,DWORD [2+esi*8+ebp]
+ and esi,255
+ movzx edi,dh
+ mov edi,DWORD [edi*8+ebp]
+ and edi,65280
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edx,255
+ and edi,255
+ mov edi,DWORD [edi*8+ebp]
+ and edi,16711680
+ xor esi,edi
+ movzx edi,bh
+ mov edi,DWORD [2+edi*8+ebp]
+ and edi,4278190080
+ xor esi,edi
+ mov edi,DWORD [20+esp]
+ and edx,255
+ mov edx,DWORD [2+edx*8+ebp]
+ and edx,255
+ movzx eax,ah
+ mov eax,DWORD [eax*8+ebp]
+ and eax,65280
+ xor edx,eax
+ mov eax,DWORD [4+esp]
+ and ebx,255
+ mov ebx,DWORD [ebx*8+ebp]
+ and ebx,16711680
+ xor edx,ebx
+ mov ebx,DWORD [8+esp]
+ mov ecx,DWORD [2+ecx*8+ebp]
+ and ecx,4278190080
+ xor edx,ecx
+ mov ecx,esi
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ ret
+align 64
+L$AES_Te:
+dd 2774754246,2774754246
+dd 2222750968,2222750968
+dd 2574743534,2574743534
+dd 2373680118,2373680118
+dd 234025727,234025727
+dd 3177933782,3177933782
+dd 2976870366,2976870366
+dd 1422247313,1422247313
+dd 1345335392,1345335392
+dd 50397442,50397442
+dd 2842126286,2842126286
+dd 2099981142,2099981142
+dd 436141799,436141799
+dd 1658312629,1658312629
+dd 3870010189,3870010189
+dd 2591454956,2591454956
+dd 1170918031,1170918031
+dd 2642575903,2642575903
+dd 1086966153,1086966153
+dd 2273148410,2273148410
+dd 368769775,368769775
+dd 3948501426,3948501426
+dd 3376891790,3376891790
+dd 200339707,200339707
+dd 3970805057,3970805057
+dd 1742001331,1742001331
+dd 4255294047,4255294047
+dd 3937382213,3937382213
+dd 3214711843,3214711843
+dd 4154762323,4154762323
+dd 2524082916,2524082916
+dd 1539358875,1539358875
+dd 3266819957,3266819957
+dd 486407649,486407649
+dd 2928907069,2928907069
+dd 1780885068,1780885068
+dd 1513502316,1513502316
+dd 1094664062,1094664062
+dd 49805301,49805301
+dd 1338821763,1338821763
+dd 1546925160,1546925160
+dd 4104496465,4104496465
+dd 887481809,887481809
+dd 150073849,150073849
+dd 2473685474,2473685474
+dd 1943591083,1943591083
+dd 1395732834,1395732834
+dd 1058346282,1058346282
+dd 201589768,201589768
+dd 1388824469,1388824469
+dd 1696801606,1696801606
+dd 1589887901,1589887901
+dd 672667696,672667696
+dd 2711000631,2711000631
+dd 251987210,251987210
+dd 3046808111,3046808111
+dd 151455502,151455502
+dd 907153956,907153956
+dd 2608889883,2608889883
+dd 1038279391,1038279391
+dd 652995533,652995533
+dd 1764173646,1764173646
+dd 3451040383,3451040383
+dd 2675275242,2675275242
+dd 453576978,453576978
+dd 2659418909,2659418909
+dd 1949051992,1949051992
+dd 773462580,773462580
+dd 756751158,756751158
+dd 2993581788,2993581788
+dd 3998898868,3998898868
+dd 4221608027,4221608027
+dd 4132590244,4132590244
+dd 1295727478,1295727478
+dd 1641469623,1641469623
+dd 3467883389,3467883389
+dd 2066295122,2066295122
+dd 1055122397,1055122397
+dd 1898917726,1898917726
+dd 2542044179,2542044179
+dd 4115878822,4115878822
+dd 1758581177,1758581177
+dd 0,0
+dd 753790401,753790401
+dd 1612718144,1612718144
+dd 536673507,536673507
+dd 3367088505,3367088505
+dd 3982187446,3982187446
+dd 3194645204,3194645204
+dd 1187761037,1187761037
+dd 3653156455,3653156455
+dd 1262041458,1262041458
+dd 3729410708,3729410708
+dd 3561770136,3561770136
+dd 3898103984,3898103984
+dd 1255133061,1255133061
+dd 1808847035,1808847035
+dd 720367557,720367557
+dd 3853167183,3853167183
+dd 385612781,385612781
+dd 3309519750,3309519750
+dd 3612167578,3612167578
+dd 1429418854,1429418854
+dd 2491778321,2491778321
+dd 3477423498,3477423498
+dd 284817897,284817897
+dd 100794884,100794884
+dd 2172616702,2172616702
+dd 4031795360,4031795360
+dd 1144798328,1144798328
+dd 3131023141,3131023141
+dd 3819481163,3819481163
+dd 4082192802,4082192802
+dd 4272137053,4272137053
+dd 3225436288,3225436288
+dd 2324664069,2324664069
+dd 2912064063,2912064063
+dd 3164445985,3164445985
+dd 1211644016,1211644016
+dd 83228145,83228145
+dd 3753688163,3753688163
+dd 3249976951,3249976951
+dd 1977277103,1977277103
+dd 1663115586,1663115586
+dd 806359072,806359072
+dd 452984805,452984805
+dd 250868733,250868733
+dd 1842533055,1842533055
+dd 1288555905,1288555905
+dd 336333848,336333848
+dd 890442534,890442534
+dd 804056259,804056259
+dd 3781124030,3781124030
+dd 2727843637,2727843637
+dd 3427026056,3427026056
+dd 957814574,957814574
+dd 1472513171,1472513171
+dd 4071073621,4071073621
+dd 2189328124,2189328124
+dd 1195195770,1195195770
+dd 2892260552,2892260552
+dd 3881655738,3881655738
+dd 723065138,723065138
+dd 2507371494,2507371494
+dd 2690670784,2690670784
+dd 2558624025,2558624025
+dd 3511635870,3511635870
+dd 2145180835,2145180835
+dd 1713513028,1713513028
+dd 2116692564,2116692564
+dd 2878378043,2878378043
+dd 2206763019,2206763019
+dd 3393603212,3393603212
+dd 703524551,703524551
+dd 3552098411,3552098411
+dd 1007948840,1007948840
+dd 2044649127,2044649127
+dd 3797835452,3797835452
+dd 487262998,487262998
+dd 1994120109,1994120109
+dd 1004593371,1004593371
+dd 1446130276,1446130276
+dd 1312438900,1312438900
+dd 503974420,503974420
+dd 3679013266,3679013266
+dd 168166924,168166924
+dd 1814307912,1814307912
+dd 3831258296,3831258296
+dd 1573044895,1573044895
+dd 1859376061,1859376061
+dd 4021070915,4021070915
+dd 2791465668,2791465668
+dd 2828112185,2828112185
+dd 2761266481,2761266481
+dd 937747667,937747667
+dd 2339994098,2339994098
+dd 854058965,854058965
+dd 1137232011,1137232011
+dd 1496790894,1496790894
+dd 3077402074,3077402074
+dd 2358086913,2358086913
+dd 1691735473,1691735473
+dd 3528347292,3528347292
+dd 3769215305,3769215305
+dd 3027004632,3027004632
+dd 4199962284,4199962284
+dd 133494003,133494003
+dd 636152527,636152527
+dd 2942657994,2942657994
+dd 2390391540,2390391540
+dd 3920539207,3920539207
+dd 403179536,403179536
+dd 3585784431,3585784431
+dd 2289596656,2289596656
+dd 1864705354,1864705354
+dd 1915629148,1915629148
+dd 605822008,605822008
+dd 4054230615,4054230615
+dd 3350508659,3350508659
+dd 1371981463,1371981463
+dd 602466507,602466507
+dd 2094914977,2094914977
+dd 2624877800,2624877800
+dd 555687742,555687742
+dd 3712699286,3712699286
+dd 3703422305,3703422305
+dd 2257292045,2257292045
+dd 2240449039,2240449039
+dd 2423288032,2423288032
+dd 1111375484,1111375484
+dd 3300242801,3300242801
+dd 2858837708,2858837708
+dd 3628615824,3628615824
+dd 84083462,84083462
+dd 32962295,32962295
+dd 302911004,302911004
+dd 2741068226,2741068226
+dd 1597322602,1597322602
+dd 4183250862,4183250862
+dd 3501832553,3501832553
+dd 2441512471,2441512471
+dd 1489093017,1489093017
+dd 656219450,656219450
+dd 3114180135,3114180135
+dd 954327513,954327513
+dd 335083755,335083755
+dd 3013122091,3013122091
+dd 856756514,856756514
+dd 3144247762,3144247762
+dd 1893325225,1893325225
+dd 2307821063,2307821063
+dd 2811532339,2811532339
+dd 3063651117,3063651117
+dd 572399164,572399164
+dd 2458355477,2458355477
+dd 552200649,552200649
+dd 1238290055,1238290055
+dd 4283782570,4283782570
+dd 2015897680,2015897680
+dd 2061492133,2061492133
+dd 2408352771,2408352771
+dd 4171342169,4171342169
+dd 2156497161,2156497161
+dd 386731290,386731290
+dd 3669999461,3669999461
+dd 837215959,837215959
+dd 3326231172,3326231172
+dd 3093850320,3093850320
+dd 3275833730,3275833730
+dd 2962856233,2962856233
+dd 1999449434,1999449434
+dd 286199582,286199582
+dd 3417354363,3417354363
+dd 4233385128,4233385128
+dd 3602627437,3602627437
+dd 974525996,974525996
+db 99,124,119,123,242,107,111,197
+db 48,1,103,43,254,215,171,118
+db 202,130,201,125,250,89,71,240
+db 173,212,162,175,156,164,114,192
+db 183,253,147,38,54,63,247,204
+db 52,165,229,241,113,216,49,21
+db 4,199,35,195,24,150,5,154
+db 7,18,128,226,235,39,178,117
+db 9,131,44,26,27,110,90,160
+db 82,59,214,179,41,227,47,132
+db 83,209,0,237,32,252,177,91
+db 106,203,190,57,74,76,88,207
+db 208,239,170,251,67,77,51,133
+db 69,249,2,127,80,60,159,168
+db 81,163,64,143,146,157,56,245
+db 188,182,218,33,16,255,243,210
+db 205,12,19,236,95,151,68,23
+db 196,167,126,61,100,93,25,115
+db 96,129,79,220,34,42,144,136
+db 70,238,184,20,222,94,11,219
+db 224,50,58,10,73,6,36,92
+db 194,211,172,98,145,149,228,121
+db 231,200,55,109,141,213,78,169
+db 108,86,244,234,101,122,174,8
+db 186,120,37,46,28,166,180,198
+db 232,221,116,31,75,189,139,138
+db 112,62,181,102,72,3,246,14
+db 97,53,87,185,134,193,29,158
+db 225,248,152,17,105,217,142,148
+db 155,30,135,233,206,85,40,223
+db 140,161,137,13,191,230,66,104
+db 65,153,45,15,176,84,187,22
+db 99,124,119,123,242,107,111,197
+db 48,1,103,43,254,215,171,118
+db 202,130,201,125,250,89,71,240
+db 173,212,162,175,156,164,114,192
+db 183,253,147,38,54,63,247,204
+db 52,165,229,241,113,216,49,21
+db 4,199,35,195,24,150,5,154
+db 7,18,128,226,235,39,178,117
+db 9,131,44,26,27,110,90,160
+db 82,59,214,179,41,227,47,132
+db 83,209,0,237,32,252,177,91
+db 106,203,190,57,74,76,88,207
+db 208,239,170,251,67,77,51,133
+db 69,249,2,127,80,60,159,168
+db 81,163,64,143,146,157,56,245
+db 188,182,218,33,16,255,243,210
+db 205,12,19,236,95,151,68,23
+db 196,167,126,61,100,93,25,115
+db 96,129,79,220,34,42,144,136
+db 70,238,184,20,222,94,11,219
+db 224,50,58,10,73,6,36,92
+db 194,211,172,98,145,149,228,121
+db 231,200,55,109,141,213,78,169
+db 108,86,244,234,101,122,174,8
+db 186,120,37,46,28,166,180,198
+db 232,221,116,31,75,189,139,138
+db 112,62,181,102,72,3,246,14
+db 97,53,87,185,134,193,29,158
+db 225,248,152,17,105,217,142,148
+db 155,30,135,233,206,85,40,223
+db 140,161,137,13,191,230,66,104
+db 65,153,45,15,176,84,187,22
+db 99,124,119,123,242,107,111,197
+db 48,1,103,43,254,215,171,118
+db 202,130,201,125,250,89,71,240
+db 173,212,162,175,156,164,114,192
+db 183,253,147,38,54,63,247,204
+db 52,165,229,241,113,216,49,21
+db 4,199,35,195,24,150,5,154
+db 7,18,128,226,235,39,178,117
+db 9,131,44,26,27,110,90,160
+db 82,59,214,179,41,227,47,132
+db 83,209,0,237,32,252,177,91
+db 106,203,190,57,74,76,88,207
+db 208,239,170,251,67,77,51,133
+db 69,249,2,127,80,60,159,168
+db 81,163,64,143,146,157,56,245
+db 188,182,218,33,16,255,243,210
+db 205,12,19,236,95,151,68,23
+db 196,167,126,61,100,93,25,115
+db 96,129,79,220,34,42,144,136
+db 70,238,184,20,222,94,11,219
+db 224,50,58,10,73,6,36,92
+db 194,211,172,98,145,149,228,121
+db 231,200,55,109,141,213,78,169
+db 108,86,244,234,101,122,174,8
+db 186,120,37,46,28,166,180,198
+db 232,221,116,31,75,189,139,138
+db 112,62,181,102,72,3,246,14
+db 97,53,87,185,134,193,29,158
+db 225,248,152,17,105,217,142,148
+db 155,30,135,233,206,85,40,223
+db 140,161,137,13,191,230,66,104
+db 65,153,45,15,176,84,187,22
+db 99,124,119,123,242,107,111,197
+db 48,1,103,43,254,215,171,118
+db 202,130,201,125,250,89,71,240
+db 173,212,162,175,156,164,114,192
+db 183,253,147,38,54,63,247,204
+db 52,165,229,241,113,216,49,21
+db 4,199,35,195,24,150,5,154
+db 7,18,128,226,235,39,178,117
+db 9,131,44,26,27,110,90,160
+db 82,59,214,179,41,227,47,132
+db 83,209,0,237,32,252,177,91
+db 106,203,190,57,74,76,88,207
+db 208,239,170,251,67,77,51,133
+db 69,249,2,127,80,60,159,168
+db 81,163,64,143,146,157,56,245
+db 188,182,218,33,16,255,243,210
+db 205,12,19,236,95,151,68,23
+db 196,167,126,61,100,93,25,115
+db 96,129,79,220,34,42,144,136
+db 70,238,184,20,222,94,11,219
+db 224,50,58,10,73,6,36,92
+db 194,211,172,98,145,149,228,121
+db 231,200,55,109,141,213,78,169
+db 108,86,244,234,101,122,174,8
+db 186,120,37,46,28,166,180,198
+db 232,221,116,31,75,189,139,138
+db 112,62,181,102,72,3,246,14
+db 97,53,87,185,134,193,29,158
+db 225,248,152,17,105,217,142,148
+db 155,30,135,233,206,85,40,223
+db 140,161,137,13,191,230,66,104
+db 65,153,45,15,176,84,187,22
+dd 1,2,4,8
+dd 16,32,64,128
+dd 27,54,0,0
+dd 0,0,0,0
+global _asm_AES_encrypt
+align 16
+_asm_AES_encrypt:
+L$_asm_AES_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [28+esp]
+ mov eax,esp
+ sub esp,36
+ and esp,-64
+ lea ebx,[edi-127]
+ sub ebx,esp
+ neg ebx
+ and ebx,960
+ sub esp,ebx
+ add esp,4
+ mov DWORD [28+esp],eax
+ call L$004pic_point
+L$004pic_point:
+ pop ebp
+ lea eax,[_OPENSSL_ia32cap_P]
+ lea ebp,[(L$AES_Te-L$004pic_point)+ebp]
+ lea ebx,[764+esp]
+ sub ebx,ebp
+ and ebx,768
+ lea ebp,[2176+ebx*1+ebp]
+ bt DWORD [eax],25
+ jnc NEAR L$005x86
+ movq mm0,[esi]
+ movq mm4,[8+esi]
+ call __sse_AES_encrypt_compact
+ mov esp,DWORD [28+esp]
+ mov esi,DWORD [24+esp]
+ movq [esi],mm0
+ movq [8+esi],mm4
+ emms
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+L$005x86:
+ mov DWORD [24+esp],ebp
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ call __x86_AES_encrypt_compact
+ mov esp,DWORD [28+esp]
+ mov esi,DWORD [24+esp]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+__x86_AES_decrypt_compact:
+ mov DWORD [20+esp],edi
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+ mov edi,DWORD [ebp-128]
+ mov esi,DWORD [ebp-96]
+ mov edi,DWORD [ebp-64]
+ mov esi,DWORD [ebp-32]
+ mov edi,DWORD [ebp]
+ mov esi,DWORD [32+ebp]
+ mov edi,DWORD [64+ebp]
+ mov esi,DWORD [96+ebp]
+align 16
+L$006loop:
+ mov esi,eax
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,dh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,ebx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,ah
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,ecx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ and edx,255
+ movzx edx,BYTE [edx*1+ebp-128]
+ movzx ecx,ch
+ movzx ecx,BYTE [ecx*1+ebp-128]
+ shl ecx,8
+ xor edx,ecx
+ mov ecx,esi
+ shr ebx,16
+ and ebx,255
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ shl ebx,16
+ xor edx,ebx
+ shr eax,24
+ movzx eax,BYTE [eax*1+ebp-128]
+ shl eax,24
+ xor edx,eax
+ mov edi,2155905152
+ and edi,ecx
+ mov esi,edi
+ shr edi,7
+ lea eax,[ecx*1+ecx]
+ sub esi,edi
+ and eax,4278124286
+ and esi,454761243
+ xor eax,esi
+ mov edi,2155905152
+ and edi,eax
+ mov esi,edi
+ shr edi,7
+ lea ebx,[eax*1+eax]
+ sub esi,edi
+ and ebx,4278124286
+ and esi,454761243
+ xor eax,ecx
+ xor ebx,esi
+ mov edi,2155905152
+ and edi,ebx
+ mov esi,edi
+ shr edi,7
+ lea ebp,[ebx*1+ebx]
+ sub esi,edi
+ and ebp,4278124286
+ and esi,454761243
+ xor ebx,ecx
+ rol ecx,8
+ xor ebp,esi
+ xor ecx,eax
+ xor eax,ebp
+ xor ecx,ebx
+ xor ebx,ebp
+ rol eax,24
+ xor ecx,ebp
+ rol ebx,16
+ xor ecx,eax
+ rol ebp,8
+ xor ecx,ebx
+ mov eax,DWORD [4+esp]
+ xor ecx,ebp
+ mov DWORD [12+esp],ecx
+ mov edi,2155905152
+ and edi,edx
+ mov esi,edi
+ shr edi,7
+ lea ebx,[edx*1+edx]
+ sub esi,edi
+ and ebx,4278124286
+ and esi,454761243
+ xor ebx,esi
+ mov edi,2155905152
+ and edi,ebx
+ mov esi,edi
+ shr edi,7
+ lea ecx,[ebx*1+ebx]
+ sub esi,edi
+ and ecx,4278124286
+ and esi,454761243
+ xor ebx,edx
+ xor ecx,esi
+ mov edi,2155905152
+ and edi,ecx
+ mov esi,edi
+ shr edi,7
+ lea ebp,[ecx*1+ecx]
+ sub esi,edi
+ and ebp,4278124286
+ and esi,454761243
+ xor ecx,edx
+ rol edx,8
+ xor ebp,esi
+ xor edx,ebx
+ xor ebx,ebp
+ xor edx,ecx
+ xor ecx,ebp
+ rol ebx,24
+ xor edx,ebp
+ rol ecx,16
+ xor edx,ebx
+ rol ebp,8
+ xor edx,ecx
+ mov ebx,DWORD [8+esp]
+ xor edx,ebp
+ mov DWORD [16+esp],edx
+ mov edi,2155905152
+ and edi,eax
+ mov esi,edi
+ shr edi,7
+ lea ecx,[eax*1+eax]
+ sub esi,edi
+ and ecx,4278124286
+ and esi,454761243
+ xor ecx,esi
+ mov edi,2155905152
+ and edi,ecx
+ mov esi,edi
+ shr edi,7
+ lea edx,[ecx*1+ecx]
+ sub esi,edi
+ and edx,4278124286
+ and esi,454761243
+ xor ecx,eax
+ xor edx,esi
+ mov edi,2155905152
+ and edi,edx
+ mov esi,edi
+ shr edi,7
+ lea ebp,[edx*1+edx]
+ sub esi,edi
+ and ebp,4278124286
+ and esi,454761243
+ xor edx,eax
+ rol eax,8
+ xor ebp,esi
+ xor eax,ecx
+ xor ecx,ebp
+ xor eax,edx
+ xor edx,ebp
+ rol ecx,24
+ xor eax,ebp
+ rol edx,16
+ xor eax,ecx
+ rol ebp,8
+ xor eax,edx
+ xor eax,ebp
+ mov edi,2155905152
+ and edi,ebx
+ mov esi,edi
+ shr edi,7
+ lea ecx,[ebx*1+ebx]
+ sub esi,edi
+ and ecx,4278124286
+ and esi,454761243
+ xor ecx,esi
+ mov edi,2155905152
+ and edi,ecx
+ mov esi,edi
+ shr edi,7
+ lea edx,[ecx*1+ecx]
+ sub esi,edi
+ and edx,4278124286
+ and esi,454761243
+ xor ecx,ebx
+ xor edx,esi
+ mov edi,2155905152
+ and edi,edx
+ mov esi,edi
+ shr edi,7
+ lea ebp,[edx*1+edx]
+ sub esi,edi
+ and ebp,4278124286
+ and esi,454761243
+ xor edx,ebx
+ rol ebx,8
+ xor ebp,esi
+ xor ebx,ecx
+ xor ecx,ebp
+ xor ebx,edx
+ xor edx,ebp
+ rol ecx,24
+ xor ebx,ebp
+ rol edx,16
+ xor ebx,ecx
+ rol ebp,8
+ xor ebx,edx
+ mov ecx,DWORD [12+esp]
+ xor ebx,ebp
+ mov edx,DWORD [16+esp]
+ mov edi,DWORD [20+esp]
+ mov ebp,DWORD [28+esp]
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ cmp edi,DWORD [24+esp]
+ mov DWORD [20+esp],edi
+ jb NEAR L$006loop
+ mov esi,eax
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,dh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,ebx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,ah
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,ecx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp-128]
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,8
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,16
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp-128]
+ shl edi,24
+ xor esi,edi
+ mov edi,DWORD [20+esp]
+ and edx,255
+ movzx edx,BYTE [edx*1+ebp-128]
+ movzx ecx,ch
+ movzx ecx,BYTE [ecx*1+ebp-128]
+ shl ecx,8
+ xor edx,ecx
+ mov ecx,esi
+ shr ebx,16
+ and ebx,255
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ shl ebx,16
+ xor edx,ebx
+ mov ebx,DWORD [8+esp]
+ shr eax,24
+ movzx eax,BYTE [eax*1+ebp-128]
+ shl eax,24
+ xor edx,eax
+ mov eax,DWORD [4+esp]
+ xor eax,DWORD [16+edi]
+ xor ebx,DWORD [20+edi]
+ xor ecx,DWORD [24+edi]
+ xor edx,DWORD [28+edi]
+ ret
+align 16
+__sse_AES_decrypt_compact:
+ pxor mm0,[edi]
+ pxor mm4,[8+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+ mov eax,454761243
+ mov DWORD [8+esp],eax
+ mov DWORD [12+esp],eax
+ mov eax,DWORD [ebp-128]
+ mov ebx,DWORD [ebp-96]
+ mov ecx,DWORD [ebp-64]
+ mov edx,DWORD [ebp-32]
+ mov eax,DWORD [ebp]
+ mov ebx,DWORD [32+ebp]
+ mov ecx,DWORD [64+ebp]
+ mov edx,DWORD [96+ebp]
+align 16
+L$007loop:
+ pshufw mm1,mm0,12
+ pshufw mm5,mm4,9
+ movd eax,mm1
+ movd ebx,mm5
+ mov DWORD [20+esp],edi
+ movzx esi,al
+ movzx edx,ah
+ pshufw mm2,mm0,6
+ movzx ecx,BYTE [esi*1+ebp-128]
+ movzx edi,bl
+ movzx edx,BYTE [edx*1+ebp-128]
+ shr eax,16
+ shl edx,8
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,16
+ pshufw mm6,mm4,3
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,ah
+ shl esi,24
+ shr ebx,16
+ or edx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl esi,24
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,al
+ shl esi,8
+ movd eax,mm2
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bl
+ shl esi,16
+ movd ebx,mm6
+ movd mm0,ecx
+ movzx ecx,BYTE [edi*1+ebp-128]
+ movzx edi,al
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bl
+ or edx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,ah
+ shl esi,16
+ shr eax,16
+ or edx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shr ebx,16
+ shl esi,8
+ movd mm1,edx
+ movzx edx,BYTE [edi*1+ebp-128]
+ movzx edi,bh
+ shl edx,24
+ and ebx,255
+ or edx,esi
+ punpckldq mm0,mm1
+ movzx esi,BYTE [edi*1+ebp-128]
+ movzx edi,al
+ shl esi,8
+ movzx eax,ah
+ movzx ebx,BYTE [ebx*1+ebp-128]
+ or ecx,esi
+ movzx esi,BYTE [edi*1+ebp-128]
+ or edx,ebx
+ shl esi,16
+ movzx eax,BYTE [eax*1+ebp-128]
+ or edx,esi
+ shl eax,24
+ or ecx,eax
+ mov edi,DWORD [20+esp]
+ movd mm4,edx
+ movd mm5,ecx
+ punpckldq mm4,mm5
+ add edi,16
+ cmp edi,DWORD [24+esp]
+ ja NEAR L$008out
+ movq mm3,mm0
+ movq mm7,mm4
+ pshufw mm2,mm0,228
+ pshufw mm6,mm4,228
+ movq mm1,mm0
+ movq mm5,mm4
+ pshufw mm0,mm0,177
+ pshufw mm4,mm4,177
+ pslld mm2,8
+ pslld mm6,8
+ psrld mm3,8
+ psrld mm7,8
+ pxor mm0,mm2
+ pxor mm4,mm6
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pslld mm2,16
+ pslld mm6,16
+ psrld mm3,16
+ psrld mm7,16
+ pxor mm0,mm2
+ pxor mm4,mm6
+ pxor mm0,mm3
+ pxor mm4,mm7
+ movq mm3,[8+esp]
+ pxor mm2,mm2
+ pxor mm6,mm6
+ pcmpgtb mm2,mm1
+ pcmpgtb mm6,mm5
+ pand mm2,mm3
+ pand mm6,mm3
+ paddb mm1,mm1
+ paddb mm5,mm5
+ pxor mm1,mm2
+ pxor mm5,mm6
+ movq mm3,mm1
+ movq mm7,mm5
+ movq mm2,mm1
+ movq mm6,mm5
+ pxor mm0,mm1
+ pxor mm4,mm5
+ pslld mm3,24
+ pslld mm7,24
+ psrld mm2,8
+ psrld mm6,8
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pxor mm0,mm2
+ pxor mm4,mm6
+ movq mm2,[8+esp]
+ pxor mm3,mm3
+ pxor mm7,mm7
+ pcmpgtb mm3,mm1
+ pcmpgtb mm7,mm5
+ pand mm3,mm2
+ pand mm7,mm2
+ paddb mm1,mm1
+ paddb mm5,mm5
+ pxor mm1,mm3
+ pxor mm5,mm7
+ pshufw mm3,mm1,177
+ pshufw mm7,mm5,177
+ pxor mm0,mm1
+ pxor mm4,mm5
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pxor mm3,mm3
+ pxor mm7,mm7
+ pcmpgtb mm3,mm1
+ pcmpgtb mm7,mm5
+ pand mm3,mm2
+ pand mm7,mm2
+ paddb mm1,mm1
+ paddb mm5,mm5
+ pxor mm1,mm3
+ pxor mm5,mm7
+ pxor mm0,mm1
+ pxor mm4,mm5
+ movq mm3,mm1
+ movq mm7,mm5
+ pshufw mm2,mm1,177
+ pshufw mm6,mm5,177
+ pxor mm0,mm2
+ pxor mm4,mm6
+ pslld mm1,8
+ pslld mm5,8
+ psrld mm3,8
+ psrld mm7,8
+ movq mm2,[edi]
+ movq mm6,[8+edi]
+ pxor mm0,mm1
+ pxor mm4,mm5
+ pxor mm0,mm3
+ pxor mm4,mm7
+ mov eax,DWORD [ebp-128]
+ pslld mm1,16
+ pslld mm5,16
+ mov ebx,DWORD [ebp-64]
+ psrld mm3,16
+ psrld mm7,16
+ mov ecx,DWORD [ebp]
+ pxor mm0,mm1
+ pxor mm4,mm5
+ mov edx,DWORD [64+ebp]
+ pxor mm0,mm3
+ pxor mm4,mm7
+ pxor mm0,mm2
+ pxor mm4,mm6
+ jmp NEAR L$007loop
+align 16
+L$008out:
+ pxor mm0,[edi]
+ pxor mm4,[8+edi]
+ ret
+align 16
+__x86_AES_decrypt:
+ mov DWORD [20+esp],edi
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [24+esp],esi
+align 16
+L$009loop:
+ mov esi,eax
+ and esi,255
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,dh
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ mov edi,ebx
+ shr edi,24
+ xor esi,DWORD [1+edi*8+ebp]
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,ah
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ mov edi,ecx
+ shr edi,24
+ xor esi,DWORD [1+edi*8+ebp]
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ mov esi,DWORD [esi*8+ebp]
+ movzx edi,bh
+ xor esi,DWORD [3+edi*8+ebp]
+ mov edi,eax
+ shr edi,16
+ and edi,255
+ xor esi,DWORD [2+edi*8+ebp]
+ mov edi,edx
+ shr edi,24
+ xor esi,DWORD [1+edi*8+ebp]
+ mov edi,DWORD [20+esp]
+ and edx,255
+ mov edx,DWORD [edx*8+ebp]
+ movzx ecx,ch
+ xor edx,DWORD [3+ecx*8+ebp]
+ mov ecx,esi
+ shr ebx,16
+ and ebx,255
+ xor edx,DWORD [2+ebx*8+ebp]
+ mov ebx,DWORD [8+esp]
+ shr eax,24
+ xor edx,DWORD [1+eax*8+ebp]
+ mov eax,DWORD [4+esp]
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ cmp edi,DWORD [24+esp]
+ mov DWORD [20+esp],edi
+ jb NEAR L$009loop
+ lea ebp,[2176+ebp]
+ mov edi,DWORD [ebp-128]
+ mov esi,DWORD [ebp-96]
+ mov edi,DWORD [ebp-64]
+ mov esi,DWORD [ebp-32]
+ mov edi,DWORD [ebp]
+ mov esi,DWORD [32+ebp]
+ mov edi,DWORD [64+ebp]
+ mov esi,DWORD [96+ebp]
+ lea ebp,[ebp-128]
+ mov esi,eax
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp]
+ movzx edi,dh
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,8
+ xor esi,edi
+ mov edi,ecx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,16
+ xor esi,edi
+ mov edi,ebx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [4+esp],esi
+ mov esi,ebx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp]
+ movzx edi,ah
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,8
+ xor esi,edi
+ mov edi,edx
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,16
+ xor esi,edi
+ mov edi,ecx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,24
+ xor esi,edi
+ mov DWORD [8+esp],esi
+ mov esi,ecx
+ and esi,255
+ movzx esi,BYTE [esi*1+ebp]
+ movzx edi,bh
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,8
+ xor esi,edi
+ mov edi,eax
+ shr edi,16
+ and edi,255
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,16
+ xor esi,edi
+ mov edi,edx
+ shr edi,24
+ movzx edi,BYTE [edi*1+ebp]
+ shl edi,24
+ xor esi,edi
+ mov edi,DWORD [20+esp]
+ and edx,255
+ movzx edx,BYTE [edx*1+ebp]
+ movzx ecx,ch
+ movzx ecx,BYTE [ecx*1+ebp]
+ shl ecx,8
+ xor edx,ecx
+ mov ecx,esi
+ shr ebx,16
+ and ebx,255
+ movzx ebx,BYTE [ebx*1+ebp]
+ shl ebx,16
+ xor edx,ebx
+ mov ebx,DWORD [8+esp]
+ shr eax,24
+ movzx eax,BYTE [eax*1+ebp]
+ shl eax,24
+ xor edx,eax
+ mov eax,DWORD [4+esp]
+ lea ebp,[ebp-2048]
+ add edi,16
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ ret
+align 64
+L$AES_Td:
+dd 1353184337,1353184337
+dd 1399144830,1399144830
+dd 3282310938,3282310938
+dd 2522752826,2522752826
+dd 3412831035,3412831035
+dd 4047871263,4047871263
+dd 2874735276,2874735276
+dd 2466505547,2466505547
+dd 1442459680,1442459680
+dd 4134368941,4134368941
+dd 2440481928,2440481928
+dd 625738485,625738485
+dd 4242007375,4242007375
+dd 3620416197,3620416197
+dd 2151953702,2151953702
+dd 2409849525,2409849525
+dd 1230680542,1230680542
+dd 1729870373,1729870373
+dd 2551114309,2551114309
+dd 3787521629,3787521629
+dd 41234371,41234371
+dd 317738113,317738113
+dd 2744600205,2744600205
+dd 3338261355,3338261355
+dd 3881799427,3881799427
+dd 2510066197,2510066197
+dd 3950669247,3950669247
+dd 3663286933,3663286933
+dd 763608788,763608788
+dd 3542185048,3542185048
+dd 694804553,694804553
+dd 1154009486,1154009486
+dd 1787413109,1787413109
+dd 2021232372,2021232372
+dd 1799248025,1799248025
+dd 3715217703,3715217703
+dd 3058688446,3058688446
+dd 397248752,397248752
+dd 1722556617,1722556617
+dd 3023752829,3023752829
+dd 407560035,407560035
+dd 2184256229,2184256229
+dd 1613975959,1613975959
+dd 1165972322,1165972322
+dd 3765920945,3765920945
+dd 2226023355,2226023355
+dd 480281086,480281086
+dd 2485848313,2485848313
+dd 1483229296,1483229296
+dd 436028815,436028815
+dd 2272059028,2272059028
+dd 3086515026,3086515026
+dd 601060267,601060267
+dd 3791801202,3791801202
+dd 1468997603,1468997603
+dd 715871590,715871590
+dd 120122290,120122290
+dd 63092015,63092015
+dd 2591802758,2591802758
+dd 2768779219,2768779219
+dd 4068943920,4068943920
+dd 2997206819,2997206819
+dd 3127509762,3127509762
+dd 1552029421,1552029421
+dd 723308426,723308426
+dd 2461301159,2461301159
+dd 4042393587,4042393587
+dd 2715969870,2715969870
+dd 3455375973,3455375973
+dd 3586000134,3586000134
+dd 526529745,526529745
+dd 2331944644,2331944644
+dd 2639474228,2639474228
+dd 2689987490,2689987490
+dd 853641733,853641733
+dd 1978398372,1978398372
+dd 971801355,971801355
+dd 2867814464,2867814464
+dd 111112542,111112542
+dd 1360031421,1360031421
+dd 4186579262,4186579262
+dd 1023860118,1023860118
+dd 2919579357,2919579357
+dd 1186850381,1186850381
+dd 3045938321,3045938321
+dd 90031217,90031217
+dd 1876166148,1876166148
+dd 4279586912,4279586912
+dd 620468249,620468249
+dd 2548678102,2548678102
+dd 3426959497,3426959497
+dd 2006899047,2006899047
+dd 3175278768,3175278768
+dd 2290845959,2290845959
+dd 945494503,945494503
+dd 3689859193,3689859193
+dd 1191869601,1191869601
+dd 3910091388,3910091388
+dd 3374220536,3374220536
+dd 0,0
+dd 2206629897,2206629897
+dd 1223502642,1223502642
+dd 2893025566,2893025566
+dd 1316117100,1316117100
+dd 4227796733,4227796733
+dd 1446544655,1446544655
+dd 517320253,517320253
+dd 658058550,658058550
+dd 1691946762,1691946762
+dd 564550760,564550760
+dd 3511966619,3511966619
+dd 976107044,976107044
+dd 2976320012,2976320012
+dd 266819475,266819475
+dd 3533106868,3533106868
+dd 2660342555,2660342555
+dd 1338359936,1338359936
+dd 2720062561,2720062561
+dd 1766553434,1766553434
+dd 370807324,370807324
+dd 179999714,179999714
+dd 3844776128,3844776128
+dd 1138762300,1138762300
+dd 488053522,488053522
+dd 185403662,185403662
+dd 2915535858,2915535858
+dd 3114841645,3114841645
+dd 3366526484,3366526484
+dd 2233069911,2233069911
+dd 1275557295,1275557295
+dd 3151862254,3151862254
+dd 4250959779,4250959779
+dd 2670068215,2670068215
+dd 3170202204,3170202204
+dd 3309004356,3309004356
+dd 880737115,880737115
+dd 1982415755,1982415755
+dd 3703972811,3703972811
+dd 1761406390,1761406390
+dd 1676797112,1676797112
+dd 3403428311,3403428311
+dd 277177154,277177154
+dd 1076008723,1076008723
+dd 538035844,538035844
+dd 2099530373,2099530373
+dd 4164795346,4164795346
+dd 288553390,288553390
+dd 1839278535,1839278535
+dd 1261411869,1261411869
+dd 4080055004,4080055004
+dd 3964831245,3964831245
+dd 3504587127,3504587127
+dd 1813426987,1813426987
+dd 2579067049,2579067049
+dd 4199060497,4199060497
+dd 577038663,577038663
+dd 3297574056,3297574056
+dd 440397984,440397984
+dd 3626794326,3626794326
+dd 4019204898,4019204898
+dd 3343796615,3343796615
+dd 3251714265,3251714265
+dd 4272081548,4272081548
+dd 906744984,906744984
+dd 3481400742,3481400742
+dd 685669029,685669029
+dd 646887386,646887386
+dd 2764025151,2764025151
+dd 3835509292,3835509292
+dd 227702864,227702864
+dd 2613862250,2613862250
+dd 1648787028,1648787028
+dd 3256061430,3256061430
+dd 3904428176,3904428176
+dd 1593260334,1593260334
+dd 4121936770,4121936770
+dd 3196083615,3196083615
+dd 2090061929,2090061929
+dd 2838353263,2838353263
+dd 3004310991,3004310991
+dd 999926984,999926984
+dd 2809993232,2809993232
+dd 1852021992,1852021992
+dd 2075868123,2075868123
+dd 158869197,158869197
+dd 4095236462,4095236462
+dd 28809964,28809964
+dd 2828685187,2828685187
+dd 1701746150,1701746150
+dd 2129067946,2129067946
+dd 147831841,147831841
+dd 3873969647,3873969647
+dd 3650873274,3650873274
+dd 3459673930,3459673930
+dd 3557400554,3557400554
+dd 3598495785,3598495785
+dd 2947720241,2947720241
+dd 824393514,824393514
+dd 815048134,815048134
+dd 3227951669,3227951669
+dd 935087732,935087732
+dd 2798289660,2798289660
+dd 2966458592,2966458592
+dd 366520115,366520115
+dd 1251476721,1251476721
+dd 4158319681,4158319681
+dd 240176511,240176511
+dd 804688151,804688151
+dd 2379631990,2379631990
+dd 1303441219,1303441219
+dd 1414376140,1414376140
+dd 3741619940,3741619940
+dd 3820343710,3820343710
+dd 461924940,461924940
+dd 3089050817,3089050817
+dd 2136040774,2136040774
+dd 82468509,82468509
+dd 1563790337,1563790337
+dd 1937016826,1937016826
+dd 776014843,776014843
+dd 1511876531,1511876531
+dd 1389550482,1389550482
+dd 861278441,861278441
+dd 323475053,323475053
+dd 2355222426,2355222426
+dd 2047648055,2047648055
+dd 2383738969,2383738969
+dd 2302415851,2302415851
+dd 3995576782,3995576782
+dd 902390199,902390199
+dd 3991215329,3991215329
+dd 1018251130,1018251130
+dd 1507840668,1507840668
+dd 1064563285,1064563285
+dd 2043548696,2043548696
+dd 3208103795,3208103795
+dd 3939366739,3939366739
+dd 1537932639,1537932639
+dd 342834655,342834655
+dd 2262516856,2262516856
+dd 2180231114,2180231114
+dd 1053059257,1053059257
+dd 741614648,741614648
+dd 1598071746,1598071746
+dd 1925389590,1925389590
+dd 203809468,203809468
+dd 2336832552,2336832552
+dd 1100287487,1100287487
+dd 1895934009,1895934009
+dd 3736275976,3736275976
+dd 2632234200,2632234200
+dd 2428589668,2428589668
+dd 1636092795,1636092795
+dd 1890988757,1890988757
+dd 1952214088,1952214088
+dd 1113045200,1113045200
+db 82,9,106,213,48,54,165,56
+db 191,64,163,158,129,243,215,251
+db 124,227,57,130,155,47,255,135
+db 52,142,67,68,196,222,233,203
+db 84,123,148,50,166,194,35,61
+db 238,76,149,11,66,250,195,78
+db 8,46,161,102,40,217,36,178
+db 118,91,162,73,109,139,209,37
+db 114,248,246,100,134,104,152,22
+db 212,164,92,204,93,101,182,146
+db 108,112,72,80,253,237,185,218
+db 94,21,70,87,167,141,157,132
+db 144,216,171,0,140,188,211,10
+db 247,228,88,5,184,179,69,6
+db 208,44,30,143,202,63,15,2
+db 193,175,189,3,1,19,138,107
+db 58,145,17,65,79,103,220,234
+db 151,242,207,206,240,180,230,115
+db 150,172,116,34,231,173,53,133
+db 226,249,55,232,28,117,223,110
+db 71,241,26,113,29,41,197,137
+db 111,183,98,14,170,24,190,27
+db 252,86,62,75,198,210,121,32
+db 154,219,192,254,120,205,90,244
+db 31,221,168,51,136,7,199,49
+db 177,18,16,89,39,128,236,95
+db 96,81,127,169,25,181,74,13
+db 45,229,122,159,147,201,156,239
+db 160,224,59,77,174,42,245,176
+db 200,235,187,60,131,83,153,97
+db 23,43,4,126,186,119,214,38
+db 225,105,20,99,85,33,12,125
+db 82,9,106,213,48,54,165,56
+db 191,64,163,158,129,243,215,251
+db 124,227,57,130,155,47,255,135
+db 52,142,67,68,196,222,233,203
+db 84,123,148,50,166,194,35,61
+db 238,76,149,11,66,250,195,78
+db 8,46,161,102,40,217,36,178
+db 118,91,162,73,109,139,209,37
+db 114,248,246,100,134,104,152,22
+db 212,164,92,204,93,101,182,146
+db 108,112,72,80,253,237,185,218
+db 94,21,70,87,167,141,157,132
+db 144,216,171,0,140,188,211,10
+db 247,228,88,5,184,179,69,6
+db 208,44,30,143,202,63,15,2
+db 193,175,189,3,1,19,138,107
+db 58,145,17,65,79,103,220,234
+db 151,242,207,206,240,180,230,115
+db 150,172,116,34,231,173,53,133
+db 226,249,55,232,28,117,223,110
+db 71,241,26,113,29,41,197,137
+db 111,183,98,14,170,24,190,27
+db 252,86,62,75,198,210,121,32
+db 154,219,192,254,120,205,90,244
+db 31,221,168,51,136,7,199,49
+db 177,18,16,89,39,128,236,95
+db 96,81,127,169,25,181,74,13
+db 45,229,122,159,147,201,156,239
+db 160,224,59,77,174,42,245,176
+db 200,235,187,60,131,83,153,97
+db 23,43,4,126,186,119,214,38
+db 225,105,20,99,85,33,12,125
+db 82,9,106,213,48,54,165,56
+db 191,64,163,158,129,243,215,251
+db 124,227,57,130,155,47,255,135
+db 52,142,67,68,196,222,233,203
+db 84,123,148,50,166,194,35,61
+db 238,76,149,11,66,250,195,78
+db 8,46,161,102,40,217,36,178
+db 118,91,162,73,109,139,209,37
+db 114,248,246,100,134,104,152,22
+db 212,164,92,204,93,101,182,146
+db 108,112,72,80,253,237,185,218
+db 94,21,70,87,167,141,157,132
+db 144,216,171,0,140,188,211,10
+db 247,228,88,5,184,179,69,6
+db 208,44,30,143,202,63,15,2
+db 193,175,189,3,1,19,138,107
+db 58,145,17,65,79,103,220,234
+db 151,242,207,206,240,180,230,115
+db 150,172,116,34,231,173,53,133
+db 226,249,55,232,28,117,223,110
+db 71,241,26,113,29,41,197,137
+db 111,183,98,14,170,24,190,27
+db 252,86,62,75,198,210,121,32
+db 154,219,192,254,120,205,90,244
+db 31,221,168,51,136,7,199,49
+db 177,18,16,89,39,128,236,95
+db 96,81,127,169,25,181,74,13
+db 45,229,122,159,147,201,156,239
+db 160,224,59,77,174,42,245,176
+db 200,235,187,60,131,83,153,97
+db 23,43,4,126,186,119,214,38
+db 225,105,20,99,85,33,12,125
+db 82,9,106,213,48,54,165,56
+db 191,64,163,158,129,243,215,251
+db 124,227,57,130,155,47,255,135
+db 52,142,67,68,196,222,233,203
+db 84,123,148,50,166,194,35,61
+db 238,76,149,11,66,250,195,78
+db 8,46,161,102,40,217,36,178
+db 118,91,162,73,109,139,209,37
+db 114,248,246,100,134,104,152,22
+db 212,164,92,204,93,101,182,146
+db 108,112,72,80,253,237,185,218
+db 94,21,70,87,167,141,157,132
+db 144,216,171,0,140,188,211,10
+db 247,228,88,5,184,179,69,6
+db 208,44,30,143,202,63,15,2
+db 193,175,189,3,1,19,138,107
+db 58,145,17,65,79,103,220,234
+db 151,242,207,206,240,180,230,115
+db 150,172,116,34,231,173,53,133
+db 226,249,55,232,28,117,223,110
+db 71,241,26,113,29,41,197,137
+db 111,183,98,14,170,24,190,27
+db 252,86,62,75,198,210,121,32
+db 154,219,192,254,120,205,90,244
+db 31,221,168,51,136,7,199,49
+db 177,18,16,89,39,128,236,95
+db 96,81,127,169,25,181,74,13
+db 45,229,122,159,147,201,156,239
+db 160,224,59,77,174,42,245,176
+db 200,235,187,60,131,83,153,97
+db 23,43,4,126,186,119,214,38
+db 225,105,20,99,85,33,12,125
+global _asm_AES_decrypt
+align 16
+_asm_AES_decrypt:
+L$_asm_AES_decrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [28+esp]
+ mov eax,esp
+ sub esp,36
+ and esp,-64
+ lea ebx,[edi-127]
+ sub ebx,esp
+ neg ebx
+ and ebx,960
+ sub esp,ebx
+ add esp,4
+ mov DWORD [28+esp],eax
+ call L$010pic_point
+L$010pic_point:
+ pop ebp
+ lea eax,[_OPENSSL_ia32cap_P]
+ lea ebp,[(L$AES_Td-L$010pic_point)+ebp]
+ lea ebx,[764+esp]
+ sub ebx,ebp
+ and ebx,768
+ lea ebp,[2176+ebx*1+ebp]
+ bt DWORD [eax],25
+ jnc NEAR L$011x86
+ movq mm0,[esi]
+ movq mm4,[8+esi]
+ call __sse_AES_decrypt_compact
+ mov esp,DWORD [28+esp]
+ mov esi,DWORD [24+esp]
+ movq [esi],mm0
+ movq [8+esi],mm4
+ emms
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+L$011x86:
+ mov DWORD [24+esp],ebp
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ call __x86_AES_decrypt_compact
+ mov esp,DWORD [28+esp]
+ mov esi,DWORD [24+esp]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _asm_AES_cbc_encrypt
+align 16
+_asm_AES_cbc_encrypt:
+L$_asm_AES_cbc_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov ecx,DWORD [28+esp]
+ cmp ecx,0
+ je NEAR L$012drop_out
+ call L$013pic_point
+L$013pic_point:
+ pop ebp
+ lea eax,[_OPENSSL_ia32cap_P]
+ cmp DWORD [40+esp],0
+ lea ebp,[(L$AES_Te-L$013pic_point)+ebp]
+ jne NEAR L$014picked_te
+ lea ebp,[(L$AES_Td-L$AES_Te)+ebp]
+L$014picked_te:
+ pushfd
+ cld
+ cmp ecx,512
+ jb NEAR L$015slow_way
+ test ecx,15
+ jnz NEAR L$015slow_way
+ bt DWORD [eax],28
+ jc NEAR L$015slow_way
+ lea esi,[esp-324]
+ and esi,-64
+ mov eax,ebp
+ lea ebx,[2304+ebp]
+ mov edx,esi
+ and eax,4095
+ and ebx,4095
+ and edx,4095
+ cmp edx,ebx
+ jb NEAR L$016tbl_break_out
+ sub edx,ebx
+ sub esi,edx
+ jmp NEAR L$017tbl_ok
+align 4
+L$016tbl_break_out:
+ sub edx,eax
+ and edx,4095
+ add edx,384
+ sub esi,edx
+align 4
+L$017tbl_ok:
+ lea edx,[24+esp]
+ xchg esp,esi
+ add esp,4
+ mov DWORD [24+esp],ebp
+ mov DWORD [28+esp],esi
+ mov eax,DWORD [edx]
+ mov ebx,DWORD [4+edx]
+ mov edi,DWORD [12+edx]
+ mov esi,DWORD [16+edx]
+ mov edx,DWORD [20+edx]
+ mov DWORD [32+esp],eax
+ mov DWORD [36+esp],ebx
+ mov DWORD [40+esp],ecx
+ mov DWORD [44+esp],edi
+ mov DWORD [48+esp],esi
+ mov DWORD [316+esp],0
+ mov ebx,edi
+ mov ecx,61
+ sub ebx,ebp
+ mov esi,edi
+ and ebx,4095
+ lea edi,[76+esp]
+ cmp ebx,2304
+ jb NEAR L$018do_copy
+ cmp ebx,3852
+ jb NEAR L$019skip_copy
+align 4
+L$018do_copy:
+ mov DWORD [44+esp],edi
+dd 2784229001
+L$019skip_copy:
+ mov edi,16
+align 4
+L$020prefetch_tbl:
+ mov eax,DWORD [ebp]
+ mov ebx,DWORD [32+ebp]
+ mov ecx,DWORD [64+ebp]
+ mov esi,DWORD [96+ebp]
+ lea ebp,[128+ebp]
+ sub edi,1
+ jnz NEAR L$020prefetch_tbl
+ sub ebp,2048
+ mov esi,DWORD [32+esp]
+ mov edi,DWORD [48+esp]
+ cmp edx,0
+ je NEAR L$021fast_decrypt
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+align 16
+L$022fast_enc_loop:
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ xor eax,DWORD [esi]
+ xor ebx,DWORD [4+esi]
+ xor ecx,DWORD [8+esi]
+ xor edx,DWORD [12+esi]
+ mov edi,DWORD [44+esp]
+ call __x86_AES_encrypt
+ mov esi,DWORD [32+esp]
+ mov edi,DWORD [36+esp]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ lea esi,[16+esi]
+ mov ecx,DWORD [40+esp]
+ mov DWORD [32+esp],esi
+ lea edx,[16+edi]
+ mov DWORD [36+esp],edx
+ sub ecx,16
+ mov DWORD [40+esp],ecx
+ jnz NEAR L$022fast_enc_loop
+ mov esi,DWORD [48+esp]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ cmp DWORD [316+esp],0
+ mov edi,DWORD [44+esp]
+ je NEAR L$023skip_ezero
+ mov ecx,60
+ xor eax,eax
+align 4
+dd 2884892297
+L$023skip_ezero:
+ mov esp,DWORD [28+esp]
+ popfd
+L$012drop_out:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$021fast_decrypt:
+ cmp esi,DWORD [36+esp]
+ je NEAR L$024fast_dec_in_place
+ mov DWORD [52+esp],edi
+align 4
+align 16
+L$025fast_dec_loop:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov edi,DWORD [44+esp]
+ call __x86_AES_decrypt
+ mov edi,DWORD [52+esp]
+ mov esi,DWORD [40+esp]
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov edi,DWORD [36+esp]
+ mov esi,DWORD [32+esp]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov ecx,DWORD [40+esp]
+ mov DWORD [52+esp],esi
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ lea edi,[16+edi]
+ mov DWORD [36+esp],edi
+ sub ecx,16
+ mov DWORD [40+esp],ecx
+ jnz NEAR L$025fast_dec_loop
+ mov edi,DWORD [52+esp]
+ mov esi,DWORD [48+esp]
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ jmp NEAR L$026fast_dec_out
+align 16
+L$024fast_dec_in_place:
+L$027fast_dec_in_place_loop:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ lea edi,[60+esp]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov edi,DWORD [44+esp]
+ call __x86_AES_decrypt
+ mov edi,DWORD [48+esp]
+ mov esi,DWORD [36+esp]
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ lea esi,[16+esi]
+ mov DWORD [36+esp],esi
+ lea esi,[60+esp]
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov esi,DWORD [32+esp]
+ mov ecx,DWORD [40+esp]
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ sub ecx,16
+ mov DWORD [40+esp],ecx
+ jnz NEAR L$027fast_dec_in_place_loop
+align 4
+L$026fast_dec_out:
+ cmp DWORD [316+esp],0
+ mov edi,DWORD [44+esp]
+ je NEAR L$028skip_dzero
+ mov ecx,60
+ xor eax,eax
+align 4
+dd 2884892297
+L$028skip_dzero:
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$015slow_way:
+ mov eax,DWORD [eax]
+ mov edi,DWORD [36+esp]
+ lea esi,[esp-80]
+ and esi,-64
+ lea ebx,[edi-143]
+ sub ebx,esi
+ neg ebx
+ and ebx,960
+ sub esi,ebx
+ lea ebx,[768+esi]
+ sub ebx,ebp
+ and ebx,768
+ lea ebp,[2176+ebx*1+ebp]
+ lea edx,[24+esp]
+ xchg esp,esi
+ add esp,4
+ mov DWORD [24+esp],ebp
+ mov DWORD [28+esp],esi
+ mov DWORD [52+esp],eax
+ mov eax,DWORD [edx]
+ mov ebx,DWORD [4+edx]
+ mov esi,DWORD [16+edx]
+ mov edx,DWORD [20+edx]
+ mov DWORD [32+esp],eax
+ mov DWORD [36+esp],ebx
+ mov DWORD [40+esp],ecx
+ mov DWORD [44+esp],edi
+ mov DWORD [48+esp],esi
+ mov edi,esi
+ mov esi,eax
+ cmp edx,0
+ je NEAR L$029slow_decrypt
+ cmp ecx,16
+ mov edx,ebx
+ jb NEAR L$030slow_enc_tail
+ bt DWORD [52+esp],25
+ jnc NEAR L$031slow_enc_x86
+ movq mm0,[edi]
+ movq mm4,[8+edi]
+align 16
+L$032slow_enc_loop_sse:
+ pxor mm0,[esi]
+ pxor mm4,[8+esi]
+ mov edi,DWORD [44+esp]
+ call __sse_AES_encrypt_compact
+ mov esi,DWORD [32+esp]
+ mov edi,DWORD [36+esp]
+ mov ecx,DWORD [40+esp]
+ movq [edi],mm0
+ movq [8+edi],mm4
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ lea edx,[16+edi]
+ mov DWORD [36+esp],edx
+ sub ecx,16
+ cmp ecx,16
+ mov DWORD [40+esp],ecx
+ jae NEAR L$032slow_enc_loop_sse
+ test ecx,15
+ jnz NEAR L$030slow_enc_tail
+ mov esi,DWORD [48+esp]
+ movq [esi],mm0
+ movq [8+esi],mm4
+ emms
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$031slow_enc_x86:
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+align 4
+L$033slow_enc_loop_x86:
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ xor eax,DWORD [esi]
+ xor ebx,DWORD [4+esi]
+ xor ecx,DWORD [8+esi]
+ xor edx,DWORD [12+esi]
+ mov edi,DWORD [44+esp]
+ call __x86_AES_encrypt_compact
+ mov esi,DWORD [32+esp]
+ mov edi,DWORD [36+esp]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov ecx,DWORD [40+esp]
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ lea edx,[16+edi]
+ mov DWORD [36+esp],edx
+ sub ecx,16
+ cmp ecx,16
+ mov DWORD [40+esp],ecx
+ jae NEAR L$033slow_enc_loop_x86
+ test ecx,15
+ jnz NEAR L$030slow_enc_tail
+ mov esi,DWORD [48+esp]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$030slow_enc_tail:
+ emms
+ mov edi,edx
+ mov ebx,16
+ sub ebx,ecx
+ cmp edi,esi
+ je NEAR L$034enc_in_place
+align 4
+dd 2767451785
+ jmp NEAR L$035enc_skip_in_place
+L$034enc_in_place:
+ lea edi,[ecx*1+edi]
+L$035enc_skip_in_place:
+ mov ecx,ebx
+ xor eax,eax
+align 4
+dd 2868115081
+ mov edi,DWORD [48+esp]
+ mov esi,edx
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov DWORD [40+esp],16
+ jmp NEAR L$033slow_enc_loop_x86
+align 16
+L$029slow_decrypt:
+ bt DWORD [52+esp],25
+ jnc NEAR L$036slow_dec_loop_x86
+align 4
+L$037slow_dec_loop_sse:
+ movq mm0,[esi]
+ movq mm4,[8+esi]
+ mov edi,DWORD [44+esp]
+ call __sse_AES_decrypt_compact
+ mov esi,DWORD [32+esp]
+ lea eax,[60+esp]
+ mov ebx,DWORD [36+esp]
+ mov ecx,DWORD [40+esp]
+ mov edi,DWORD [48+esp]
+ movq mm1,[esi]
+ movq mm5,[8+esi]
+ pxor mm0,[edi]
+ pxor mm4,[8+edi]
+ movq [edi],mm1
+ movq [8+edi],mm5
+ sub ecx,16
+ jc NEAR L$038slow_dec_partial_sse
+ movq [ebx],mm0
+ movq [8+ebx],mm4
+ lea ebx,[16+ebx]
+ mov DWORD [36+esp],ebx
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ mov DWORD [40+esp],ecx
+ jnz NEAR L$037slow_dec_loop_sse
+ emms
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$038slow_dec_partial_sse:
+ movq [eax],mm0
+ movq [8+eax],mm4
+ emms
+ add ecx,16
+ mov edi,ebx
+ mov esi,eax
+align 4
+dd 2767451785
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$036slow_dec_loop_x86:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ lea edi,[60+esp]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov edi,DWORD [44+esp]
+ call __x86_AES_decrypt_compact
+ mov edi,DWORD [48+esp]
+ mov esi,DWORD [40+esp]
+ xor eax,DWORD [edi]
+ xor ebx,DWORD [4+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [12+edi]
+ sub esi,16
+ jc NEAR L$039slow_dec_partial_x86
+ mov DWORD [40+esp],esi
+ mov esi,DWORD [36+esp]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ lea esi,[16+esi]
+ mov DWORD [36+esp],esi
+ lea esi,[60+esp]
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov esi,DWORD [32+esp]
+ lea esi,[16+esi]
+ mov DWORD [32+esp],esi
+ jnz NEAR L$036slow_dec_loop_x86
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+ pushfd
+align 16
+L$039slow_dec_partial_x86:
+ lea esi,[60+esp]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ mov esi,DWORD [32+esp]
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov ecx,DWORD [40+esp]
+ mov edi,DWORD [36+esp]
+ lea esi,[60+esp]
+align 4
+dd 2767451785
+ mov esp,DWORD [28+esp]
+ popfd
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+__x86_AES_set_encrypt_key:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [32+esp]
+ test esi,-1
+ jz NEAR L$040badpointer
+ test edi,-1
+ jz NEAR L$040badpointer
+ call L$041pic_point
+L$041pic_point:
+ pop ebp
+ lea ebp,[(L$AES_Te-L$041pic_point)+ebp]
+ lea ebp,[2176+ebp]
+ mov eax,DWORD [ebp-128]
+ mov ebx,DWORD [ebp-96]
+ mov ecx,DWORD [ebp-64]
+ mov edx,DWORD [ebp-32]
+ mov eax,DWORD [ebp]
+ mov ebx,DWORD [32+ebp]
+ mov ecx,DWORD [64+ebp]
+ mov edx,DWORD [96+ebp]
+ mov ecx,DWORD [28+esp]
+ cmp ecx,128
+ je NEAR L$04210rounds
+ cmp ecx,192
+ je NEAR L$04312rounds
+ cmp ecx,256
+ je NEAR L$04414rounds
+ mov eax,-2
+ jmp NEAR L$045exit
+L$04210rounds:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ xor ecx,ecx
+ jmp NEAR L$04610shortcut
+align 4
+L$04710loop:
+ mov eax,DWORD [edi]
+ mov edx,DWORD [12+edi]
+L$04610shortcut:
+ movzx esi,dl
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD [896+ecx*4+ebp]
+ mov DWORD [16+edi],eax
+ xor eax,DWORD [4+edi]
+ mov DWORD [20+edi],eax
+ xor eax,DWORD [8+edi]
+ mov DWORD [24+edi],eax
+ xor eax,DWORD [12+edi]
+ mov DWORD [28+edi],eax
+ inc ecx
+ add edi,16
+ cmp ecx,10
+ jl NEAR L$04710loop
+ mov DWORD [80+edi],10
+ xor eax,eax
+ jmp NEAR L$045exit
+L$04312rounds:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [20+esi]
+ mov DWORD [16+edi],ecx
+ mov DWORD [20+edi],edx
+ xor ecx,ecx
+ jmp NEAR L$04812shortcut
+align 4
+L$04912loop:
+ mov eax,DWORD [edi]
+ mov edx,DWORD [20+edi]
+L$04812shortcut:
+ movzx esi,dl
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD [896+ecx*4+ebp]
+ mov DWORD [24+edi],eax
+ xor eax,DWORD [4+edi]
+ mov DWORD [28+edi],eax
+ xor eax,DWORD [8+edi]
+ mov DWORD [32+edi],eax
+ xor eax,DWORD [12+edi]
+ mov DWORD [36+edi],eax
+ cmp ecx,7
+ je NEAR L$05012break
+ inc ecx
+ xor eax,DWORD [16+edi]
+ mov DWORD [40+edi],eax
+ xor eax,DWORD [20+edi]
+ mov DWORD [44+edi],eax
+ add edi,24
+ jmp NEAR L$04912loop
+L$05012break:
+ mov DWORD [72+edi],12
+ xor eax,eax
+ jmp NEAR L$045exit
+L$04414rounds:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [12+edi],edx
+ mov eax,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [28+esi]
+ mov DWORD [16+edi],eax
+ mov DWORD [20+edi],ebx
+ mov DWORD [24+edi],ecx
+ mov DWORD [28+edi],edx
+ xor ecx,ecx
+ jmp NEAR L$05114shortcut
+align 4
+L$05214loop:
+ mov edx,DWORD [28+edi]
+L$05114shortcut:
+ mov eax,DWORD [edi]
+ movzx esi,dl
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD [896+ecx*4+ebp]
+ mov DWORD [32+edi],eax
+ xor eax,DWORD [4+edi]
+ mov DWORD [36+edi],eax
+ xor eax,DWORD [8+edi]
+ mov DWORD [40+edi],eax
+ xor eax,DWORD [12+edi]
+ mov DWORD [44+edi],eax
+ cmp ecx,6
+ je NEAR L$05314break
+ inc ecx
+ mov edx,eax
+ mov eax,DWORD [16+edi]
+ movzx esi,dl
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shr edx,16
+ shl ebx,8
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ movzx esi,dh
+ shl ebx,16
+ xor eax,ebx
+ movzx ebx,BYTE [esi*1+ebp-128]
+ shl ebx,24
+ xor eax,ebx
+ mov DWORD [48+edi],eax
+ xor eax,DWORD [20+edi]
+ mov DWORD [52+edi],eax
+ xor eax,DWORD [24+edi]
+ mov DWORD [56+edi],eax
+ xor eax,DWORD [28+edi]
+ mov DWORD [60+edi],eax
+ add edi,32
+ jmp NEAR L$05214loop
+L$05314break:
+ mov DWORD [48+edi],14
+ xor eax,eax
+ jmp NEAR L$045exit
+L$040badpointer:
+ mov eax,-1
+L$045exit:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _asm_AES_set_encrypt_key
+align 16
+_asm_AES_set_encrypt_key:
+L$_asm_AES_set_encrypt_key_begin:
+ call __x86_AES_set_encrypt_key
+ ret
+global _asm_AES_set_decrypt_key
+align 16
+_asm_AES_set_decrypt_key:
+L$_asm_AES_set_decrypt_key_begin:
+ call __x86_AES_set_encrypt_key
+ cmp eax,0
+ je NEAR L$054proceed
+ ret
+L$054proceed:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [28+esp]
+ mov ecx,DWORD [240+esi]
+ lea ecx,[ecx*4]
+ lea edi,[ecx*4+esi]
+align 4
+L$055invert:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [edi]
+ mov edx,DWORD [4+edi]
+ mov DWORD [edi],eax
+ mov DWORD [4+edi],ebx
+ mov DWORD [esi],ecx
+ mov DWORD [4+esi],edx
+ mov eax,DWORD [8+esi]
+ mov ebx,DWORD [12+esi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ mov DWORD [8+edi],eax
+ mov DWORD [12+edi],ebx
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ add esi,16
+ sub edi,16
+ cmp esi,edi
+ jne NEAR L$055invert
+ mov edi,DWORD [28+esp]
+ mov esi,DWORD [240+edi]
+ lea esi,[esi*1+esi-2]
+ lea esi,[esi*8+edi]
+ mov DWORD [28+esp],esi
+ mov eax,DWORD [16+edi]
+align 4
+L$056permute:
+ add edi,16
+ mov ebp,2155905152
+ and ebp,eax
+ lea ebx,[eax*1+eax]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and ebx,4278124286
+ and esi,454761243
+ xor ebx,esi
+ mov ebp,2155905152
+ and ebp,ebx
+ lea ecx,[ebx*1+ebx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and ecx,4278124286
+ and esi,454761243
+ xor ebx,eax
+ xor ecx,esi
+ mov ebp,2155905152
+ and ebp,ecx
+ lea edx,[ecx*1+ecx]
+ mov esi,ebp
+ shr ebp,7
+ xor ecx,eax
+ sub esi,ebp
+ and edx,4278124286
+ and esi,454761243
+ rol eax,8
+ xor edx,esi
+ mov ebp,DWORD [4+edi]
+ xor eax,ebx
+ xor ebx,edx
+ xor eax,ecx
+ rol ebx,24
+ xor ecx,edx
+ xor eax,edx
+ rol ecx,16
+ xor eax,ebx
+ rol edx,8
+ xor eax,ecx
+ mov ebx,ebp
+ xor eax,edx
+ mov DWORD [edi],eax
+ mov ebp,2155905152
+ and ebp,ebx
+ lea ecx,[ebx*1+ebx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and ecx,4278124286
+ and esi,454761243
+ xor ecx,esi
+ mov ebp,2155905152
+ and ebp,ecx
+ lea edx,[ecx*1+ecx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and edx,4278124286
+ and esi,454761243
+ xor ecx,ebx
+ xor edx,esi
+ mov ebp,2155905152
+ and ebp,edx
+ lea eax,[edx*1+edx]
+ mov esi,ebp
+ shr ebp,7
+ xor edx,ebx
+ sub esi,ebp
+ and eax,4278124286
+ and esi,454761243
+ rol ebx,8
+ xor eax,esi
+ mov ebp,DWORD [8+edi]
+ xor ebx,ecx
+ xor ecx,eax
+ xor ebx,edx
+ rol ecx,24
+ xor edx,eax
+ xor ebx,eax
+ rol edx,16
+ xor ebx,ecx
+ rol eax,8
+ xor ebx,edx
+ mov ecx,ebp
+ xor ebx,eax
+ mov DWORD [4+edi],ebx
+ mov ebp,2155905152
+ and ebp,ecx
+ lea edx,[ecx*1+ecx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and edx,4278124286
+ and esi,454761243
+ xor edx,esi
+ mov ebp,2155905152
+ and ebp,edx
+ lea eax,[edx*1+edx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and eax,4278124286
+ and esi,454761243
+ xor edx,ecx
+ xor eax,esi
+ mov ebp,2155905152
+ and ebp,eax
+ lea ebx,[eax*1+eax]
+ mov esi,ebp
+ shr ebp,7
+ xor eax,ecx
+ sub esi,ebp
+ and ebx,4278124286
+ and esi,454761243
+ rol ecx,8
+ xor ebx,esi
+ mov ebp,DWORD [12+edi]
+ xor ecx,edx
+ xor edx,ebx
+ xor ecx,eax
+ rol edx,24
+ xor eax,ebx
+ xor ecx,ebx
+ rol eax,16
+ xor ecx,edx
+ rol ebx,8
+ xor ecx,eax
+ mov edx,ebp
+ xor ecx,ebx
+ mov DWORD [8+edi],ecx
+ mov ebp,2155905152
+ and ebp,edx
+ lea eax,[edx*1+edx]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and eax,4278124286
+ and esi,454761243
+ xor eax,esi
+ mov ebp,2155905152
+ and ebp,eax
+ lea ebx,[eax*1+eax]
+ mov esi,ebp
+ shr ebp,7
+ sub esi,ebp
+ and ebx,4278124286
+ and esi,454761243
+ xor eax,edx
+ xor ebx,esi
+ mov ebp,2155905152
+ and ebp,ebx
+ lea ecx,[ebx*1+ebx]
+ mov esi,ebp
+ shr ebp,7
+ xor ebx,edx
+ sub esi,ebp
+ and ecx,4278124286
+ and esi,454761243
+ rol edx,8
+ xor ecx,esi
+ mov ebp,DWORD [16+edi]
+ xor edx,eax
+ xor eax,ecx
+ xor edx,ebx
+ rol eax,24
+ xor ebx,ecx
+ xor edx,ecx
+ rol ebx,16
+ xor edx,eax
+ rol ecx,8
+ xor edx,ebx
+ mov eax,ebp
+ xor edx,ecx
+ mov DWORD [12+edi],edx
+ cmp edi,DWORD [28+esp]
+ jb NEAR L$056permute
+ xor eax,eax
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+db 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
+db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
+db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/aes/aesni-x86.asm b/win-x86/crypto/aes/aesni-x86.asm
new file mode 100644
index 0000000..a9a5956
--- /dev/null
+++ b/win-x86/crypto/aes/aesni-x86.asm
@@ -0,0 +1,2424 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _aesni_encrypt
+align 16
+_aesni_encrypt:
+L$_aesni_encrypt_begin:
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [12+esp]
+ movups xmm2,[eax]
+ mov ecx,DWORD [240+edx]
+ mov eax,DWORD [8+esp]
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$000enc1_loop_1:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$000enc1_loop_1
+db 102,15,56,221,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movups [eax],xmm2
+ pxor xmm2,xmm2
+ ret
+global _aesni_decrypt
+align 16
+_aesni_decrypt:
+L$_aesni_decrypt_begin:
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [12+esp]
+ movups xmm2,[eax]
+ mov ecx,DWORD [240+edx]
+ mov eax,DWORD [8+esp]
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$001dec1_loop_2:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$001dec1_loop_2
+db 102,15,56,223,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movups [eax],xmm2
+ pxor xmm2,xmm2
+ ret
+align 16
+__aesni_encrypt2:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+ add ecx,16
+L$002enc2_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$002enc2_loop
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,221,208
+db 102,15,56,221,216
+ ret
+align 16
+__aesni_decrypt2:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+ add ecx,16
+L$003dec2_loop:
+db 102,15,56,222,209
+db 102,15,56,222,217
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,222,208
+db 102,15,56,222,216
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$003dec2_loop
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,223,208
+db 102,15,56,223,216
+ ret
+align 16
+__aesni_encrypt3:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+ add ecx,16
+L$004enc3_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+db 102,15,56,220,224
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$004enc3_loop
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+db 102,15,56,221,208
+db 102,15,56,221,216
+db 102,15,56,221,224
+ ret
+align 16
+__aesni_decrypt3:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+ add ecx,16
+L$005dec3_loop:
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,222,208
+db 102,15,56,222,216
+db 102,15,56,222,224
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$005dec3_loop
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+db 102,15,56,223,208
+db 102,15,56,223,216
+db 102,15,56,223,224
+ ret
+align 16
+__aesni_encrypt4:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ shl ecx,4
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+db 15,31,64,0
+ add ecx,16
+L$006enc4_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+db 102,15,56,220,233
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+db 102,15,56,220,224
+db 102,15,56,220,232
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$006enc4_loop
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+db 102,15,56,220,233
+db 102,15,56,221,208
+db 102,15,56,221,216
+db 102,15,56,221,224
+db 102,15,56,221,232
+ ret
+align 16
+__aesni_decrypt4:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ shl ecx,4
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ movups xmm0,[32+edx]
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+db 15,31,64,0
+ add ecx,16
+L$007dec4_loop:
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+db 102,15,56,222,233
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,222,208
+db 102,15,56,222,216
+db 102,15,56,222,224
+db 102,15,56,222,232
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$007dec4_loop
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+db 102,15,56,222,233
+db 102,15,56,223,208
+db 102,15,56,223,216
+db 102,15,56,223,224
+db 102,15,56,223,232
+ ret
+align 16
+__aesni_encrypt6:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+db 102,15,56,220,209
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+db 102,15,56,220,217
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+db 102,15,56,220,225
+ pxor xmm7,xmm0
+ movups xmm0,[ecx*1+edx]
+ add ecx,16
+ jmp NEAR L$008_aesni_encrypt6_inner
+align 16
+L$009enc6_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+L$008_aesni_encrypt6_inner:
+db 102,15,56,220,233
+db 102,15,56,220,241
+db 102,15,56,220,249
+L$_aesni_encrypt6_enter:
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+db 102,15,56,220,224
+db 102,15,56,220,232
+db 102,15,56,220,240
+db 102,15,56,220,248
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$009enc6_loop
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,220,225
+db 102,15,56,220,233
+db 102,15,56,220,241
+db 102,15,56,220,249
+db 102,15,56,221,208
+db 102,15,56,221,216
+db 102,15,56,221,224
+db 102,15,56,221,232
+db 102,15,56,221,240
+db 102,15,56,221,248
+ ret
+align 16
+__aesni_decrypt6:
+ movups xmm0,[edx]
+ shl ecx,4
+ movups xmm1,[16+edx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+db 102,15,56,222,209
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+db 102,15,56,222,217
+ lea edx,[32+ecx*1+edx]
+ neg ecx
+db 102,15,56,222,225
+ pxor xmm7,xmm0
+ movups xmm0,[ecx*1+edx]
+ add ecx,16
+ jmp NEAR L$010_aesni_decrypt6_inner
+align 16
+L$011dec6_loop:
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+L$010_aesni_decrypt6_inner:
+db 102,15,56,222,233
+db 102,15,56,222,241
+db 102,15,56,222,249
+L$_aesni_decrypt6_enter:
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,222,208
+db 102,15,56,222,216
+db 102,15,56,222,224
+db 102,15,56,222,232
+db 102,15,56,222,240
+db 102,15,56,222,248
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$011dec6_loop
+db 102,15,56,222,209
+db 102,15,56,222,217
+db 102,15,56,222,225
+db 102,15,56,222,233
+db 102,15,56,222,241
+db 102,15,56,222,249
+db 102,15,56,223,208
+db 102,15,56,223,216
+db 102,15,56,223,224
+db 102,15,56,223,232
+db 102,15,56,223,240
+db 102,15,56,223,248
+ ret
+global _aesni_ecb_encrypt
+align 16
+_aesni_ecb_encrypt:
+L$_aesni_ecb_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebx,DWORD [36+esp]
+ and eax,-16
+ jz NEAR L$012ecb_ret
+ mov ecx,DWORD [240+edx]
+ test ebx,ebx
+ jz NEAR L$013ecb_decrypt
+ mov ebp,edx
+ mov ebx,ecx
+ cmp eax,96
+ jb NEAR L$014ecb_enc_tail
+ movdqu xmm2,[esi]
+ movdqu xmm3,[16+esi]
+ movdqu xmm4,[32+esi]
+ movdqu xmm5,[48+esi]
+ movdqu xmm6,[64+esi]
+ movdqu xmm7,[80+esi]
+ lea esi,[96+esi]
+ sub eax,96
+ jmp NEAR L$015ecb_enc_loop6_enter
+align 16
+L$016ecb_enc_loop6:
+ movups [edi],xmm2
+ movdqu xmm2,[esi]
+ movups [16+edi],xmm3
+ movdqu xmm3,[16+esi]
+ movups [32+edi],xmm4
+ movdqu xmm4,[32+esi]
+ movups [48+edi],xmm5
+ movdqu xmm5,[48+esi]
+ movups [64+edi],xmm6
+ movdqu xmm6,[64+esi]
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ movdqu xmm7,[80+esi]
+ lea esi,[96+esi]
+L$015ecb_enc_loop6_enter:
+ call __aesni_encrypt6
+ mov edx,ebp
+ mov ecx,ebx
+ sub eax,96
+ jnc NEAR L$016ecb_enc_loop6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ add eax,96
+ jz NEAR L$012ecb_ret
+L$014ecb_enc_tail:
+ movups xmm2,[esi]
+ cmp eax,32
+ jb NEAR L$017ecb_enc_one
+ movups xmm3,[16+esi]
+ je NEAR L$018ecb_enc_two
+ movups xmm4,[32+esi]
+ cmp eax,64
+ jb NEAR L$019ecb_enc_three
+ movups xmm5,[48+esi]
+ je NEAR L$020ecb_enc_four
+ movups xmm6,[64+esi]
+ xorps xmm7,xmm7
+ call __aesni_encrypt6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ jmp NEAR L$012ecb_ret
+align 16
+L$017ecb_enc_one:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$021enc1_loop_3:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$021enc1_loop_3
+db 102,15,56,221,209
+ movups [edi],xmm2
+ jmp NEAR L$012ecb_ret
+align 16
+L$018ecb_enc_two:
+ call __aesni_encrypt2
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ jmp NEAR L$012ecb_ret
+align 16
+L$019ecb_enc_three:
+ call __aesni_encrypt3
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ jmp NEAR L$012ecb_ret
+align 16
+L$020ecb_enc_four:
+ call __aesni_encrypt4
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ jmp NEAR L$012ecb_ret
+align 16
+L$013ecb_decrypt:
+ mov ebp,edx
+ mov ebx,ecx
+ cmp eax,96
+ jb NEAR L$022ecb_dec_tail
+ movdqu xmm2,[esi]
+ movdqu xmm3,[16+esi]
+ movdqu xmm4,[32+esi]
+ movdqu xmm5,[48+esi]
+ movdqu xmm6,[64+esi]
+ movdqu xmm7,[80+esi]
+ lea esi,[96+esi]
+ sub eax,96
+ jmp NEAR L$023ecb_dec_loop6_enter
+align 16
+L$024ecb_dec_loop6:
+ movups [edi],xmm2
+ movdqu xmm2,[esi]
+ movups [16+edi],xmm3
+ movdqu xmm3,[16+esi]
+ movups [32+edi],xmm4
+ movdqu xmm4,[32+esi]
+ movups [48+edi],xmm5
+ movdqu xmm5,[48+esi]
+ movups [64+edi],xmm6
+ movdqu xmm6,[64+esi]
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ movdqu xmm7,[80+esi]
+ lea esi,[96+esi]
+L$023ecb_dec_loop6_enter:
+ call __aesni_decrypt6
+ mov edx,ebp
+ mov ecx,ebx
+ sub eax,96
+ jnc NEAR L$024ecb_dec_loop6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ add eax,96
+ jz NEAR L$012ecb_ret
+L$022ecb_dec_tail:
+ movups xmm2,[esi]
+ cmp eax,32
+ jb NEAR L$025ecb_dec_one
+ movups xmm3,[16+esi]
+ je NEAR L$026ecb_dec_two
+ movups xmm4,[32+esi]
+ cmp eax,64
+ jb NEAR L$027ecb_dec_three
+ movups xmm5,[48+esi]
+ je NEAR L$028ecb_dec_four
+ movups xmm6,[64+esi]
+ xorps xmm7,xmm7
+ call __aesni_decrypt6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ jmp NEAR L$012ecb_ret
+align 16
+L$025ecb_dec_one:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$029dec1_loop_4:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$029dec1_loop_4
+db 102,15,56,223,209
+ movups [edi],xmm2
+ jmp NEAR L$012ecb_ret
+align 16
+L$026ecb_dec_two:
+ call __aesni_decrypt2
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ jmp NEAR L$012ecb_ret
+align 16
+L$027ecb_dec_three:
+ call __aesni_decrypt3
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ jmp NEAR L$012ecb_ret
+align 16
+L$028ecb_dec_four:
+ call __aesni_decrypt4
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+L$012ecb_ret:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_ccm64_encrypt_blocks
+align 16
+_aesni_ccm64_encrypt_blocks:
+L$_aesni_ccm64_encrypt_blocks_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebx,DWORD [36+esp]
+ mov ecx,DWORD [40+esp]
+ mov ebp,esp
+ sub esp,60
+ and esp,-16
+ mov DWORD [48+esp],ebp
+ movdqu xmm7,[ebx]
+ movdqu xmm3,[ecx]
+ mov ecx,DWORD [240+edx]
+ mov DWORD [esp],202182159
+ mov DWORD [4+esp],134810123
+ mov DWORD [8+esp],67438087
+ mov DWORD [12+esp],66051
+ mov ebx,1
+ xor ebp,ebp
+ mov DWORD [16+esp],ebx
+ mov DWORD [20+esp],ebp
+ mov DWORD [24+esp],ebp
+ mov DWORD [28+esp],ebp
+ shl ecx,4
+ mov ebx,16
+ lea ebp,[edx]
+ movdqa xmm5,[esp]
+ movdqa xmm2,xmm7
+ lea edx,[32+ecx*1+edx]
+ sub ebx,ecx
+db 102,15,56,0,253
+L$030ccm64_enc_outer:
+ movups xmm0,[ebp]
+ mov ecx,ebx
+ movups xmm6,[esi]
+ xorps xmm2,xmm0
+ movups xmm1,[16+ebp]
+ xorps xmm0,xmm6
+ xorps xmm3,xmm0
+ movups xmm0,[32+ebp]
+L$031ccm64_enc2_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$031ccm64_enc2_loop
+db 102,15,56,220,209
+db 102,15,56,220,217
+ paddq xmm7,[16+esp]
+ dec eax
+db 102,15,56,221,208
+db 102,15,56,221,216
+ lea esi,[16+esi]
+ xorps xmm6,xmm2
+ movdqa xmm2,xmm7
+ movups [edi],xmm6
+db 102,15,56,0,213
+ lea edi,[16+edi]
+ jnz NEAR L$030ccm64_enc_outer
+ mov esp,DWORD [48+esp]
+ mov edi,DWORD [40+esp]
+ movups [edi],xmm3
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_ccm64_decrypt_blocks
+align 16
+_aesni_ccm64_decrypt_blocks:
+L$_aesni_ccm64_decrypt_blocks_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebx,DWORD [36+esp]
+ mov ecx,DWORD [40+esp]
+ mov ebp,esp
+ sub esp,60
+ and esp,-16
+ mov DWORD [48+esp],ebp
+ movdqu xmm7,[ebx]
+ movdqu xmm3,[ecx]
+ mov ecx,DWORD [240+edx]
+ mov DWORD [esp],202182159
+ mov DWORD [4+esp],134810123
+ mov DWORD [8+esp],67438087
+ mov DWORD [12+esp],66051
+ mov ebx,1
+ xor ebp,ebp
+ mov DWORD [16+esp],ebx
+ mov DWORD [20+esp],ebp
+ mov DWORD [24+esp],ebp
+ mov DWORD [28+esp],ebp
+ movdqa xmm5,[esp]
+ movdqa xmm2,xmm7
+ mov ebp,edx
+ mov ebx,ecx
+db 102,15,56,0,253
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$032enc1_loop_5:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$032enc1_loop_5
+db 102,15,56,221,209
+ shl ebx,4
+ mov ecx,16
+ movups xmm6,[esi]
+ paddq xmm7,[16+esp]
+ lea esi,[16+esi]
+ sub ecx,ebx
+ lea edx,[32+ebx*1+ebp]
+ mov ebx,ecx
+ jmp NEAR L$033ccm64_dec_outer
+align 16
+L$033ccm64_dec_outer:
+ xorps xmm6,xmm2
+ movdqa xmm2,xmm7
+ movups [edi],xmm6
+ lea edi,[16+edi]
+db 102,15,56,0,213
+ sub eax,1
+ jz NEAR L$034ccm64_dec_break
+ movups xmm0,[ebp]
+ mov ecx,ebx
+ movups xmm1,[16+ebp]
+ xorps xmm6,xmm0
+ xorps xmm2,xmm0
+ xorps xmm3,xmm6
+ movups xmm0,[32+ebp]
+L$035ccm64_dec2_loop:
+db 102,15,56,220,209
+db 102,15,56,220,217
+ movups xmm1,[ecx*1+edx]
+ add ecx,32
+db 102,15,56,220,208
+db 102,15,56,220,216
+ movups xmm0,[ecx*1+edx-16]
+ jnz NEAR L$035ccm64_dec2_loop
+ movups xmm6,[esi]
+ paddq xmm7,[16+esp]
+db 102,15,56,220,209
+db 102,15,56,220,217
+db 102,15,56,221,208
+db 102,15,56,221,216
+ lea esi,[16+esi]
+ jmp NEAR L$033ccm64_dec_outer
+align 16
+L$034ccm64_dec_break:
+ mov ecx,DWORD [240+ebp]
+ mov edx,ebp
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ xorps xmm6,xmm0
+ lea edx,[32+edx]
+ xorps xmm3,xmm6
+L$036enc1_loop_6:
+db 102,15,56,220,217
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$036enc1_loop_6
+db 102,15,56,221,217
+ mov esp,DWORD [48+esp]
+ mov edi,DWORD [40+esp]
+ movups [edi],xmm3
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_ctr32_encrypt_blocks
+align 16
+_aesni_ctr32_encrypt_blocks:
+L$_aesni_ctr32_encrypt_blocks_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebx,DWORD [36+esp]
+ mov ebp,esp
+ sub esp,88
+ and esp,-16
+ mov DWORD [80+esp],ebp
+ cmp eax,1
+ je NEAR L$037ctr32_one_shortcut
+ movdqu xmm7,[ebx]
+ mov DWORD [esp],202182159
+ mov DWORD [4+esp],134810123
+ mov DWORD [8+esp],67438087
+ mov DWORD [12+esp],66051
+ mov ecx,6
+ xor ebp,ebp
+ mov DWORD [16+esp],ecx
+ mov DWORD [20+esp],ecx
+ mov DWORD [24+esp],ecx
+ mov DWORD [28+esp],ebp
+db 102,15,58,22,251,3
+db 102,15,58,34,253,3
+ mov ecx,DWORD [240+edx]
+ bswap ebx
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movdqa xmm2,[esp]
+db 102,15,58,34,195,0
+ lea ebp,[3+ebx]
+db 102,15,58,34,205,0
+ inc ebx
+db 102,15,58,34,195,1
+ inc ebp
+db 102,15,58,34,205,1
+ inc ebx
+db 102,15,58,34,195,2
+ inc ebp
+db 102,15,58,34,205,2
+ movdqa [48+esp],xmm0
+db 102,15,56,0,194
+ movdqu xmm6,[edx]
+ movdqa [64+esp],xmm1
+db 102,15,56,0,202
+ pshufd xmm2,xmm0,192
+ pshufd xmm3,xmm0,128
+ cmp eax,6
+ jb NEAR L$038ctr32_tail
+ pxor xmm7,xmm6
+ shl ecx,4
+ mov ebx,16
+ movdqa [32+esp],xmm7
+ mov ebp,edx
+ sub ebx,ecx
+ lea edx,[32+ecx*1+edx]
+ sub eax,6
+ jmp NEAR L$039ctr32_loop6
+align 16
+L$039ctr32_loop6:
+ pshufd xmm4,xmm0,64
+ movdqa xmm0,[32+esp]
+ pshufd xmm5,xmm1,192
+ pxor xmm2,xmm0
+ pshufd xmm6,xmm1,128
+ pxor xmm3,xmm0
+ pshufd xmm7,xmm1,64
+ movups xmm1,[16+ebp]
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+db 102,15,56,220,209
+ pxor xmm6,xmm0
+ pxor xmm7,xmm0
+db 102,15,56,220,217
+ movups xmm0,[32+ebp]
+ mov ecx,ebx
+db 102,15,56,220,225
+db 102,15,56,220,233
+db 102,15,56,220,241
+db 102,15,56,220,249
+ call L$_aesni_encrypt6_enter
+ movups xmm1,[esi]
+ movups xmm0,[16+esi]
+ xorps xmm2,xmm1
+ movups xmm1,[32+esi]
+ xorps xmm3,xmm0
+ movups [edi],xmm2
+ movdqa xmm0,[16+esp]
+ xorps xmm4,xmm1
+ movdqa xmm1,[64+esp]
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ paddd xmm1,xmm0
+ paddd xmm0,[48+esp]
+ movdqa xmm2,[esp]
+ movups xmm3,[48+esi]
+ movups xmm4,[64+esi]
+ xorps xmm5,xmm3
+ movups xmm3,[80+esi]
+ lea esi,[96+esi]
+ movdqa [48+esp],xmm0
+db 102,15,56,0,194
+ xorps xmm6,xmm4
+ movups [48+edi],xmm5
+ xorps xmm7,xmm3
+ movdqa [64+esp],xmm1
+db 102,15,56,0,202
+ movups [64+edi],xmm6
+ pshufd xmm2,xmm0,192
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ pshufd xmm3,xmm0,128
+ sub eax,6
+ jnc NEAR L$039ctr32_loop6
+ add eax,6
+ jz NEAR L$040ctr32_ret
+ movdqu xmm7,[ebp]
+ mov edx,ebp
+ pxor xmm7,[32+esp]
+ mov ecx,DWORD [240+ebp]
+L$038ctr32_tail:
+ por xmm2,xmm7
+ cmp eax,2
+ jb NEAR L$041ctr32_one
+ pshufd xmm4,xmm0,64
+ por xmm3,xmm7
+ je NEAR L$042ctr32_two
+ pshufd xmm5,xmm1,192
+ por xmm4,xmm7
+ cmp eax,4
+ jb NEAR L$043ctr32_three
+ pshufd xmm6,xmm1,128
+ por xmm5,xmm7
+ je NEAR L$044ctr32_four
+ por xmm6,xmm7
+ call __aesni_encrypt6
+ movups xmm1,[esi]
+ movups xmm0,[16+esi]
+ xorps xmm2,xmm1
+ movups xmm1,[32+esi]
+ xorps xmm3,xmm0
+ movups xmm0,[48+esi]
+ xorps xmm4,xmm1
+ movups xmm1,[64+esi]
+ xorps xmm5,xmm0
+ movups [edi],xmm2
+ xorps xmm6,xmm1
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ jmp NEAR L$040ctr32_ret
+align 16
+L$037ctr32_one_shortcut:
+ movups xmm2,[ebx]
+ mov ecx,DWORD [240+edx]
+L$041ctr32_one:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$045enc1_loop_7:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$045enc1_loop_7
+db 102,15,56,221,209
+ movups xmm6,[esi]
+ xorps xmm6,xmm2
+ movups [edi],xmm6
+ jmp NEAR L$040ctr32_ret
+align 16
+L$042ctr32_two:
+ call __aesni_encrypt2
+ movups xmm5,[esi]
+ movups xmm6,[16+esi]
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ jmp NEAR L$040ctr32_ret
+align 16
+L$043ctr32_three:
+ call __aesni_encrypt3
+ movups xmm5,[esi]
+ movups xmm6,[16+esi]
+ xorps xmm2,xmm5
+ movups xmm7,[32+esi]
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ xorps xmm4,xmm7
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ jmp NEAR L$040ctr32_ret
+align 16
+L$044ctr32_four:
+ call __aesni_encrypt4
+ movups xmm6,[esi]
+ movups xmm7,[16+esi]
+ movups xmm1,[32+esi]
+ xorps xmm2,xmm6
+ movups xmm0,[48+esi]
+ xorps xmm3,xmm7
+ movups [edi],xmm2
+ xorps xmm4,xmm1
+ movups [16+edi],xmm3
+ xorps xmm5,xmm0
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+L$040ctr32_ret:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ movdqa [32+esp],xmm0
+ pxor xmm5,xmm5
+ movdqa [48+esp],xmm0
+ pxor xmm6,xmm6
+ movdqa [64+esp],xmm0
+ pxor xmm7,xmm7
+ mov esp,DWORD [80+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_xts_encrypt
+align 16
+_aesni_xts_encrypt:
+L$_aesni_xts_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edx,DWORD [36+esp]
+ mov esi,DWORD [40+esp]
+ mov ecx,DWORD [240+edx]
+ movups xmm2,[esi]
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$046enc1_loop_8:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$046enc1_loop_8
+db 102,15,56,221,209
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebp,esp
+ sub esp,120
+ mov ecx,DWORD [240+edx]
+ and esp,-16
+ mov DWORD [96+esp],135
+ mov DWORD [100+esp],0
+ mov DWORD [104+esp],1
+ mov DWORD [108+esp],0
+ mov DWORD [112+esp],eax
+ mov DWORD [116+esp],ebp
+ movdqa xmm1,xmm2
+ pxor xmm0,xmm0
+ movdqa xmm3,[96+esp]
+ pcmpgtd xmm0,xmm1
+ and eax,-16
+ mov ebp,edx
+ mov ebx,ecx
+ sub eax,96
+ jc NEAR L$047xts_enc_short
+ shl ecx,4
+ mov ebx,16
+ sub ebx,ecx
+ lea edx,[32+ecx*1+edx]
+ jmp NEAR L$048xts_enc_loop6
+align 16
+L$048xts_enc_loop6:
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [16+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [32+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [48+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm7,xmm0,19
+ movdqa [64+esp],xmm1
+ paddq xmm1,xmm1
+ movups xmm0,[ebp]
+ pand xmm7,xmm3
+ movups xmm2,[esi]
+ pxor xmm7,xmm1
+ mov ecx,ebx
+ movdqu xmm3,[16+esi]
+ xorps xmm2,xmm0
+ movdqu xmm4,[32+esi]
+ pxor xmm3,xmm0
+ movdqu xmm5,[48+esi]
+ pxor xmm4,xmm0
+ movdqu xmm6,[64+esi]
+ pxor xmm5,xmm0
+ movdqu xmm1,[80+esi]
+ pxor xmm6,xmm0
+ lea esi,[96+esi]
+ pxor xmm2,[esp]
+ movdqa [80+esp],xmm7
+ pxor xmm7,xmm1
+ movups xmm1,[16+ebp]
+ pxor xmm3,[16+esp]
+ pxor xmm4,[32+esp]
+db 102,15,56,220,209
+ pxor xmm5,[48+esp]
+ pxor xmm6,[64+esp]
+db 102,15,56,220,217
+ pxor xmm7,xmm0
+ movups xmm0,[32+ebp]
+db 102,15,56,220,225
+db 102,15,56,220,233
+db 102,15,56,220,241
+db 102,15,56,220,249
+ call L$_aesni_encrypt6_enter
+ movdqa xmm1,[80+esp]
+ pxor xmm0,xmm0
+ xorps xmm2,[esp]
+ pcmpgtd xmm0,xmm1
+ xorps xmm3,[16+esp]
+ movups [edi],xmm2
+ xorps xmm4,[32+esp]
+ movups [16+edi],xmm3
+ xorps xmm5,[48+esp]
+ movups [32+edi],xmm4
+ xorps xmm6,[64+esp]
+ movups [48+edi],xmm5
+ xorps xmm7,xmm1
+ movups [64+edi],xmm6
+ pshufd xmm2,xmm0,19
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ movdqa xmm3,[96+esp]
+ pxor xmm0,xmm0
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ sub eax,96
+ jnc NEAR L$048xts_enc_loop6
+ mov ecx,DWORD [240+ebp]
+ mov edx,ebp
+ mov ebx,ecx
+L$047xts_enc_short:
+ add eax,96
+ jz NEAR L$049xts_enc_done6x
+ movdqa xmm5,xmm1
+ cmp eax,32
+ jb NEAR L$050xts_enc_one
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ je NEAR L$051xts_enc_two
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa xmm6,xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ cmp eax,64
+ jb NEAR L$052xts_enc_three
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa xmm7,xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ movdqa [esp],xmm5
+ movdqa [16+esp],xmm6
+ je NEAR L$053xts_enc_four
+ movdqa [32+esp],xmm7
+ pshufd xmm7,xmm0,19
+ movdqa [48+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm7,xmm3
+ pxor xmm7,xmm1
+ movdqu xmm2,[esi]
+ movdqu xmm3,[16+esi]
+ movdqu xmm4,[32+esi]
+ pxor xmm2,[esp]
+ movdqu xmm5,[48+esi]
+ pxor xmm3,[16+esp]
+ movdqu xmm6,[64+esi]
+ pxor xmm4,[32+esp]
+ lea esi,[80+esi]
+ pxor xmm5,[48+esp]
+ movdqa [64+esp],xmm7
+ pxor xmm6,xmm7
+ call __aesni_encrypt6
+ movaps xmm1,[64+esp]
+ xorps xmm2,[esp]
+ xorps xmm3,[16+esp]
+ xorps xmm4,[32+esp]
+ movups [edi],xmm2
+ xorps xmm5,[48+esp]
+ movups [16+edi],xmm3
+ xorps xmm6,xmm1
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ lea edi,[80+edi]
+ jmp NEAR L$054xts_enc_done
+align 16
+L$050xts_enc_one:
+ movups xmm2,[esi]
+ lea esi,[16+esi]
+ xorps xmm2,xmm5
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$055enc1_loop_9:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$055enc1_loop_9
+db 102,15,56,221,209
+ xorps xmm2,xmm5
+ movups [edi],xmm2
+ lea edi,[16+edi]
+ movdqa xmm1,xmm5
+ jmp NEAR L$054xts_enc_done
+align 16
+L$051xts_enc_two:
+ movaps xmm6,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ lea esi,[32+esi]
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ call __aesni_encrypt2
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ lea edi,[32+edi]
+ movdqa xmm1,xmm6
+ jmp NEAR L$054xts_enc_done
+align 16
+L$052xts_enc_three:
+ movaps xmm7,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ movups xmm4,[32+esi]
+ lea esi,[48+esi]
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ xorps xmm4,xmm7
+ call __aesni_encrypt3
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ xorps xmm4,xmm7
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ lea edi,[48+edi]
+ movdqa xmm1,xmm7
+ jmp NEAR L$054xts_enc_done
+align 16
+L$053xts_enc_four:
+ movaps xmm6,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ movups xmm4,[32+esi]
+ xorps xmm2,[esp]
+ movups xmm5,[48+esi]
+ lea esi,[64+esi]
+ xorps xmm3,[16+esp]
+ xorps xmm4,xmm7
+ xorps xmm5,xmm6
+ call __aesni_encrypt4
+ xorps xmm2,[esp]
+ xorps xmm3,[16+esp]
+ xorps xmm4,xmm7
+ movups [edi],xmm2
+ xorps xmm5,xmm6
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ lea edi,[64+edi]
+ movdqa xmm1,xmm6
+ jmp NEAR L$054xts_enc_done
+align 16
+L$049xts_enc_done6x:
+ mov eax,DWORD [112+esp]
+ and eax,15
+ jz NEAR L$056xts_enc_ret
+ movdqa xmm5,xmm1
+ mov DWORD [112+esp],eax
+ jmp NEAR L$057xts_enc_steal
+align 16
+L$054xts_enc_done:
+ mov eax,DWORD [112+esp]
+ pxor xmm0,xmm0
+ and eax,15
+ jz NEAR L$056xts_enc_ret
+ pcmpgtd xmm0,xmm1
+ mov DWORD [112+esp],eax
+ pshufd xmm5,xmm0,19
+ paddq xmm1,xmm1
+ pand xmm5,[96+esp]
+ pxor xmm5,xmm1
+L$057xts_enc_steal:
+ movzx ecx,BYTE [esi]
+ movzx edx,BYTE [edi-16]
+ lea esi,[1+esi]
+ mov BYTE [edi-16],cl
+ mov BYTE [edi],dl
+ lea edi,[1+edi]
+ sub eax,1
+ jnz NEAR L$057xts_enc_steal
+ sub edi,DWORD [112+esp]
+ mov edx,ebp
+ mov ecx,ebx
+ movups xmm2,[edi-16]
+ xorps xmm2,xmm5
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$058enc1_loop_10:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$058enc1_loop_10
+db 102,15,56,221,209
+ xorps xmm2,xmm5
+ movups [edi-16],xmm2
+L$056xts_enc_ret:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ movdqa [esp],xmm0
+ pxor xmm3,xmm3
+ movdqa [16+esp],xmm0
+ pxor xmm4,xmm4
+ movdqa [32+esp],xmm0
+ pxor xmm5,xmm5
+ movdqa [48+esp],xmm0
+ pxor xmm6,xmm6
+ movdqa [64+esp],xmm0
+ pxor xmm7,xmm7
+ movdqa [80+esp],xmm0
+ mov esp,DWORD [116+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_xts_decrypt
+align 16
+_aesni_xts_decrypt:
+L$_aesni_xts_decrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edx,DWORD [36+esp]
+ mov esi,DWORD [40+esp]
+ mov ecx,DWORD [240+edx]
+ movups xmm2,[esi]
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$059enc1_loop_11:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$059enc1_loop_11
+db 102,15,56,221,209
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebp,esp
+ sub esp,120
+ and esp,-16
+ xor ebx,ebx
+ test eax,15
+ setnz bl
+ shl ebx,4
+ sub eax,ebx
+ mov DWORD [96+esp],135
+ mov DWORD [100+esp],0
+ mov DWORD [104+esp],1
+ mov DWORD [108+esp],0
+ mov DWORD [112+esp],eax
+ mov DWORD [116+esp],ebp
+ mov ecx,DWORD [240+edx]
+ mov ebp,edx
+ mov ebx,ecx
+ movdqa xmm1,xmm2
+ pxor xmm0,xmm0
+ movdqa xmm3,[96+esp]
+ pcmpgtd xmm0,xmm1
+ and eax,-16
+ sub eax,96
+ jc NEAR L$060xts_dec_short
+ shl ecx,4
+ mov ebx,16
+ sub ebx,ecx
+ lea edx,[32+ecx*1+edx]
+ jmp NEAR L$061xts_dec_loop6
+align 16
+L$061xts_dec_loop6:
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [16+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [32+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa [48+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ pshufd xmm7,xmm0,19
+ movdqa [64+esp],xmm1
+ paddq xmm1,xmm1
+ movups xmm0,[ebp]
+ pand xmm7,xmm3
+ movups xmm2,[esi]
+ pxor xmm7,xmm1
+ mov ecx,ebx
+ movdqu xmm3,[16+esi]
+ xorps xmm2,xmm0
+ movdqu xmm4,[32+esi]
+ pxor xmm3,xmm0
+ movdqu xmm5,[48+esi]
+ pxor xmm4,xmm0
+ movdqu xmm6,[64+esi]
+ pxor xmm5,xmm0
+ movdqu xmm1,[80+esi]
+ pxor xmm6,xmm0
+ lea esi,[96+esi]
+ pxor xmm2,[esp]
+ movdqa [80+esp],xmm7
+ pxor xmm7,xmm1
+ movups xmm1,[16+ebp]
+ pxor xmm3,[16+esp]
+ pxor xmm4,[32+esp]
+db 102,15,56,222,209
+ pxor xmm5,[48+esp]
+ pxor xmm6,[64+esp]
+db 102,15,56,222,217
+ pxor xmm7,xmm0
+ movups xmm0,[32+ebp]
+db 102,15,56,222,225
+db 102,15,56,222,233
+db 102,15,56,222,241
+db 102,15,56,222,249
+ call L$_aesni_decrypt6_enter
+ movdqa xmm1,[80+esp]
+ pxor xmm0,xmm0
+ xorps xmm2,[esp]
+ pcmpgtd xmm0,xmm1
+ xorps xmm3,[16+esp]
+ movups [edi],xmm2
+ xorps xmm4,[32+esp]
+ movups [16+edi],xmm3
+ xorps xmm5,[48+esp]
+ movups [32+edi],xmm4
+ xorps xmm6,[64+esp]
+ movups [48+edi],xmm5
+ xorps xmm7,xmm1
+ movups [64+edi],xmm6
+ pshufd xmm2,xmm0,19
+ movups [80+edi],xmm7
+ lea edi,[96+edi]
+ movdqa xmm3,[96+esp]
+ pxor xmm0,xmm0
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ sub eax,96
+ jnc NEAR L$061xts_dec_loop6
+ mov ecx,DWORD [240+ebp]
+ mov edx,ebp
+ mov ebx,ecx
+L$060xts_dec_short:
+ add eax,96
+ jz NEAR L$062xts_dec_done6x
+ movdqa xmm5,xmm1
+ cmp eax,32
+ jb NEAR L$063xts_dec_one
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ je NEAR L$064xts_dec_two
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa xmm6,xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ cmp eax,64
+ jb NEAR L$065xts_dec_three
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa xmm7,xmm1
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+ movdqa [esp],xmm5
+ movdqa [16+esp],xmm6
+ je NEAR L$066xts_dec_four
+ movdqa [32+esp],xmm7
+ pshufd xmm7,xmm0,19
+ movdqa [48+esp],xmm1
+ paddq xmm1,xmm1
+ pand xmm7,xmm3
+ pxor xmm7,xmm1
+ movdqu xmm2,[esi]
+ movdqu xmm3,[16+esi]
+ movdqu xmm4,[32+esi]
+ pxor xmm2,[esp]
+ movdqu xmm5,[48+esi]
+ pxor xmm3,[16+esp]
+ movdqu xmm6,[64+esi]
+ pxor xmm4,[32+esp]
+ lea esi,[80+esi]
+ pxor xmm5,[48+esp]
+ movdqa [64+esp],xmm7
+ pxor xmm6,xmm7
+ call __aesni_decrypt6
+ movaps xmm1,[64+esp]
+ xorps xmm2,[esp]
+ xorps xmm3,[16+esp]
+ xorps xmm4,[32+esp]
+ movups [edi],xmm2
+ xorps xmm5,[48+esp]
+ movups [16+edi],xmm3
+ xorps xmm6,xmm1
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ movups [64+edi],xmm6
+ lea edi,[80+edi]
+ jmp NEAR L$067xts_dec_done
+align 16
+L$063xts_dec_one:
+ movups xmm2,[esi]
+ lea esi,[16+esi]
+ xorps xmm2,xmm5
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$068dec1_loop_12:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$068dec1_loop_12
+db 102,15,56,223,209
+ xorps xmm2,xmm5
+ movups [edi],xmm2
+ lea edi,[16+edi]
+ movdqa xmm1,xmm5
+ jmp NEAR L$067xts_dec_done
+align 16
+L$064xts_dec_two:
+ movaps xmm6,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ lea esi,[32+esi]
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ call __aesni_decrypt2
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ lea edi,[32+edi]
+ movdqa xmm1,xmm6
+ jmp NEAR L$067xts_dec_done
+align 16
+L$065xts_dec_three:
+ movaps xmm7,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ movups xmm4,[32+esi]
+ lea esi,[48+esi]
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ xorps xmm4,xmm7
+ call __aesni_decrypt3
+ xorps xmm2,xmm5
+ xorps xmm3,xmm6
+ xorps xmm4,xmm7
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ lea edi,[48+edi]
+ movdqa xmm1,xmm7
+ jmp NEAR L$067xts_dec_done
+align 16
+L$066xts_dec_four:
+ movaps xmm6,xmm1
+ movups xmm2,[esi]
+ movups xmm3,[16+esi]
+ movups xmm4,[32+esi]
+ xorps xmm2,[esp]
+ movups xmm5,[48+esi]
+ lea esi,[64+esi]
+ xorps xmm3,[16+esp]
+ xorps xmm4,xmm7
+ xorps xmm5,xmm6
+ call __aesni_decrypt4
+ xorps xmm2,[esp]
+ xorps xmm3,[16+esp]
+ xorps xmm4,xmm7
+ movups [edi],xmm2
+ xorps xmm5,xmm6
+ movups [16+edi],xmm3
+ movups [32+edi],xmm4
+ movups [48+edi],xmm5
+ lea edi,[64+edi]
+ movdqa xmm1,xmm6
+ jmp NEAR L$067xts_dec_done
+align 16
+L$062xts_dec_done6x:
+ mov eax,DWORD [112+esp]
+ and eax,15
+ jz NEAR L$069xts_dec_ret
+ mov DWORD [112+esp],eax
+ jmp NEAR L$070xts_dec_only_one_more
+align 16
+L$067xts_dec_done:
+ mov eax,DWORD [112+esp]
+ pxor xmm0,xmm0
+ and eax,15
+ jz NEAR L$069xts_dec_ret
+ pcmpgtd xmm0,xmm1
+ mov DWORD [112+esp],eax
+ pshufd xmm2,xmm0,19
+ pxor xmm0,xmm0
+ movdqa xmm3,[96+esp]
+ paddq xmm1,xmm1
+ pand xmm2,xmm3
+ pcmpgtd xmm0,xmm1
+ pxor xmm1,xmm2
+L$070xts_dec_only_one_more:
+ pshufd xmm5,xmm0,19
+ movdqa xmm6,xmm1
+ paddq xmm1,xmm1
+ pand xmm5,xmm3
+ pxor xmm5,xmm1
+ mov edx,ebp
+ mov ecx,ebx
+ movups xmm2,[esi]
+ xorps xmm2,xmm5
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$071dec1_loop_13:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$071dec1_loop_13
+db 102,15,56,223,209
+ xorps xmm2,xmm5
+ movups [edi],xmm2
+L$072xts_dec_steal:
+ movzx ecx,BYTE [16+esi]
+ movzx edx,BYTE [edi]
+ lea esi,[1+esi]
+ mov BYTE [edi],cl
+ mov BYTE [16+edi],dl
+ lea edi,[1+edi]
+ sub eax,1
+ jnz NEAR L$072xts_dec_steal
+ sub edi,DWORD [112+esp]
+ mov edx,ebp
+ mov ecx,ebx
+ movups xmm2,[edi]
+ xorps xmm2,xmm6
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$073dec1_loop_14:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$073dec1_loop_14
+db 102,15,56,223,209
+ xorps xmm2,xmm6
+ movups [edi],xmm2
+L$069xts_dec_ret:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ movdqa [esp],xmm0
+ pxor xmm3,xmm3
+ movdqa [16+esp],xmm0
+ pxor xmm4,xmm4
+ movdqa [32+esp],xmm0
+ pxor xmm5,xmm5
+ movdqa [48+esp],xmm0
+ pxor xmm6,xmm6
+ movdqa [64+esp],xmm0
+ pxor xmm7,xmm7
+ movdqa [80+esp],xmm0
+ mov esp,DWORD [116+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _aesni_cbc_encrypt
+align 16
+_aesni_cbc_encrypt:
+L$_aesni_cbc_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov ebx,esp
+ mov edi,DWORD [24+esp]
+ sub ebx,24
+ mov eax,DWORD [28+esp]
+ and ebx,-16
+ mov edx,DWORD [32+esp]
+ mov ebp,DWORD [36+esp]
+ test eax,eax
+ jz NEAR L$074cbc_abort
+ cmp DWORD [40+esp],0
+ xchg ebx,esp
+ movups xmm7,[ebp]
+ mov ecx,DWORD [240+edx]
+ mov ebp,edx
+ mov DWORD [16+esp],ebx
+ mov ebx,ecx
+ je NEAR L$075cbc_decrypt
+ movaps xmm2,xmm7
+ cmp eax,16
+ jb NEAR L$076cbc_enc_tail
+ sub eax,16
+ jmp NEAR L$077cbc_enc_loop
+align 16
+L$077cbc_enc_loop:
+ movups xmm7,[esi]
+ lea esi,[16+esi]
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ xorps xmm7,xmm0
+ lea edx,[32+edx]
+ xorps xmm2,xmm7
+L$078enc1_loop_15:
+db 102,15,56,220,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$078enc1_loop_15
+db 102,15,56,221,209
+ mov ecx,ebx
+ mov edx,ebp
+ movups [edi],xmm2
+ lea edi,[16+edi]
+ sub eax,16
+ jnc NEAR L$077cbc_enc_loop
+ add eax,16
+ jnz NEAR L$076cbc_enc_tail
+ movaps xmm7,xmm2
+ pxor xmm2,xmm2
+ jmp NEAR L$079cbc_ret
+L$076cbc_enc_tail:
+ mov ecx,eax
+dd 2767451785
+ mov ecx,16
+ sub ecx,eax
+ xor eax,eax
+dd 2868115081
+ lea edi,[edi-16]
+ mov ecx,ebx
+ mov esi,edi
+ mov edx,ebp
+ jmp NEAR L$077cbc_enc_loop
+align 16
+L$075cbc_decrypt:
+ cmp eax,80
+ jbe NEAR L$080cbc_dec_tail
+ movaps [esp],xmm7
+ sub eax,80
+ jmp NEAR L$081cbc_dec_loop6_enter
+align 16
+L$082cbc_dec_loop6:
+ movaps [esp],xmm0
+ movups [edi],xmm7
+ lea edi,[16+edi]
+L$081cbc_dec_loop6_enter:
+ movdqu xmm2,[esi]
+ movdqu xmm3,[16+esi]
+ movdqu xmm4,[32+esi]
+ movdqu xmm5,[48+esi]
+ movdqu xmm6,[64+esi]
+ movdqu xmm7,[80+esi]
+ call __aesni_decrypt6
+ movups xmm1,[esi]
+ movups xmm0,[16+esi]
+ xorps xmm2,[esp]
+ xorps xmm3,xmm1
+ movups xmm1,[32+esi]
+ xorps xmm4,xmm0
+ movups xmm0,[48+esi]
+ xorps xmm5,xmm1
+ movups xmm1,[64+esi]
+ xorps xmm6,xmm0
+ movups xmm0,[80+esi]
+ xorps xmm7,xmm1
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ lea esi,[96+esi]
+ movups [32+edi],xmm4
+ mov ecx,ebx
+ movups [48+edi],xmm5
+ mov edx,ebp
+ movups [64+edi],xmm6
+ lea edi,[80+edi]
+ sub eax,96
+ ja NEAR L$082cbc_dec_loop6
+ movaps xmm2,xmm7
+ movaps xmm7,xmm0
+ add eax,80
+ jle NEAR L$083cbc_dec_clear_tail_collected
+ movups [edi],xmm2
+ lea edi,[16+edi]
+L$080cbc_dec_tail:
+ movups xmm2,[esi]
+ movaps xmm6,xmm2
+ cmp eax,16
+ jbe NEAR L$084cbc_dec_one
+ movups xmm3,[16+esi]
+ movaps xmm5,xmm3
+ cmp eax,32
+ jbe NEAR L$085cbc_dec_two
+ movups xmm4,[32+esi]
+ cmp eax,48
+ jbe NEAR L$086cbc_dec_three
+ movups xmm5,[48+esi]
+ cmp eax,64
+ jbe NEAR L$087cbc_dec_four
+ movups xmm6,[64+esi]
+ movaps [esp],xmm7
+ movups xmm2,[esi]
+ xorps xmm7,xmm7
+ call __aesni_decrypt6
+ movups xmm1,[esi]
+ movups xmm0,[16+esi]
+ xorps xmm2,[esp]
+ xorps xmm3,xmm1
+ movups xmm1,[32+esi]
+ xorps xmm4,xmm0
+ movups xmm0,[48+esi]
+ xorps xmm5,xmm1
+ movups xmm7,[64+esi]
+ xorps xmm6,xmm0
+ movups [edi],xmm2
+ movups [16+edi],xmm3
+ pxor xmm3,xmm3
+ movups [32+edi],xmm4
+ pxor xmm4,xmm4
+ movups [48+edi],xmm5
+ pxor xmm5,xmm5
+ lea edi,[64+edi]
+ movaps xmm2,xmm6
+ pxor xmm6,xmm6
+ sub eax,80
+ jmp NEAR L$088cbc_dec_tail_collected
+align 16
+L$084cbc_dec_one:
+ movups xmm0,[edx]
+ movups xmm1,[16+edx]
+ lea edx,[32+edx]
+ xorps xmm2,xmm0
+L$089dec1_loop_16:
+db 102,15,56,222,209
+ dec ecx
+ movups xmm1,[edx]
+ lea edx,[16+edx]
+ jnz NEAR L$089dec1_loop_16
+db 102,15,56,223,209
+ xorps xmm2,xmm7
+ movaps xmm7,xmm6
+ sub eax,16
+ jmp NEAR L$088cbc_dec_tail_collected
+align 16
+L$085cbc_dec_two:
+ call __aesni_decrypt2
+ xorps xmm2,xmm7
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ movaps xmm2,xmm3
+ pxor xmm3,xmm3
+ lea edi,[16+edi]
+ movaps xmm7,xmm5
+ sub eax,32
+ jmp NEAR L$088cbc_dec_tail_collected
+align 16
+L$086cbc_dec_three:
+ call __aesni_decrypt3
+ xorps xmm2,xmm7
+ xorps xmm3,xmm6
+ xorps xmm4,xmm5
+ movups [edi],xmm2
+ movaps xmm2,xmm4
+ pxor xmm4,xmm4
+ movups [16+edi],xmm3
+ pxor xmm3,xmm3
+ lea edi,[32+edi]
+ movups xmm7,[32+esi]
+ sub eax,48
+ jmp NEAR L$088cbc_dec_tail_collected
+align 16
+L$087cbc_dec_four:
+ call __aesni_decrypt4
+ movups xmm1,[16+esi]
+ movups xmm0,[32+esi]
+ xorps xmm2,xmm7
+ movups xmm7,[48+esi]
+ xorps xmm3,xmm6
+ movups [edi],xmm2
+ xorps xmm4,xmm1
+ movups [16+edi],xmm3
+ pxor xmm3,xmm3
+ xorps xmm5,xmm0
+ movups [32+edi],xmm4
+ pxor xmm4,xmm4
+ lea edi,[48+edi]
+ movaps xmm2,xmm5
+ pxor xmm5,xmm5
+ sub eax,64
+ jmp NEAR L$088cbc_dec_tail_collected
+align 16
+L$083cbc_dec_clear_tail_collected:
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+L$088cbc_dec_tail_collected:
+ and eax,15
+ jnz NEAR L$090cbc_dec_tail_partial
+ movups [edi],xmm2
+ pxor xmm0,xmm0
+ jmp NEAR L$079cbc_ret
+align 16
+L$090cbc_dec_tail_partial:
+ movaps [esp],xmm2
+ pxor xmm0,xmm0
+ mov ecx,16
+ mov esi,esp
+ sub ecx,eax
+dd 2767451785
+ movdqa [esp],xmm2
+L$079cbc_ret:
+ mov esp,DWORD [16+esp]
+ mov ebp,DWORD [36+esp]
+ pxor xmm2,xmm2
+ pxor xmm1,xmm1
+ movups [ebp],xmm7
+ pxor xmm7,xmm7
+L$074cbc_abort:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+__aesni_set_encrypt_key:
+ push ebp
+ push ebx
+ test eax,eax
+ jz NEAR L$091bad_pointer
+ test edx,edx
+ jz NEAR L$091bad_pointer
+ call L$092pic
+L$092pic:
+ pop ebx
+ lea ebx,[(L$key_const-L$092pic)+ebx]
+ lea ebp,[_OPENSSL_ia32cap_P]
+ movups xmm0,[eax]
+ xorps xmm4,xmm4
+ mov ebp,DWORD [4+ebp]
+ lea edx,[16+edx]
+ and ebp,268437504
+ cmp ecx,256
+ je NEAR L$09314rounds
+ cmp ecx,192
+ je NEAR L$09412rounds
+ cmp ecx,128
+ jne NEAR L$095bad_keybits
+align 16
+L$09610rounds:
+ cmp ebp,268435456
+ je NEAR L$09710rounds_alt
+ mov ecx,9
+ movups [edx-16],xmm0
+db 102,15,58,223,200,1
+ call L$098key_128_cold
+db 102,15,58,223,200,2
+ call L$099key_128
+db 102,15,58,223,200,4
+ call L$099key_128
+db 102,15,58,223,200,8
+ call L$099key_128
+db 102,15,58,223,200,16
+ call L$099key_128
+db 102,15,58,223,200,32
+ call L$099key_128
+db 102,15,58,223,200,64
+ call L$099key_128
+db 102,15,58,223,200,128
+ call L$099key_128
+db 102,15,58,223,200,27
+ call L$099key_128
+db 102,15,58,223,200,54
+ call L$099key_128
+ movups [edx],xmm0
+ mov DWORD [80+edx],ecx
+ jmp NEAR L$100good_key
+align 16
+L$099key_128:
+ movups [edx],xmm0
+ lea edx,[16+edx]
+L$098key_128_cold:
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ ret
+align 16
+L$09710rounds_alt:
+ movdqa xmm5,[ebx]
+ mov ecx,8
+ movdqa xmm4,[32+ebx]
+ movdqa xmm2,xmm0
+ movdqu [edx-16],xmm0
+L$101loop_key128:
+db 102,15,56,0,197
+db 102,15,56,221,196
+ pslld xmm4,1
+ lea edx,[16+edx]
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+ pxor xmm0,xmm2
+ movdqu [edx-16],xmm0
+ movdqa xmm2,xmm0
+ dec ecx
+ jnz NEAR L$101loop_key128
+ movdqa xmm4,[48+ebx]
+db 102,15,56,0,197
+db 102,15,56,221,196
+ pslld xmm4,1
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+ pxor xmm0,xmm2
+ movdqu [edx],xmm0
+ movdqa xmm2,xmm0
+db 102,15,56,0,197
+db 102,15,56,221,196
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+ pxor xmm0,xmm2
+ movdqu [16+edx],xmm0
+ mov ecx,9
+ mov DWORD [96+edx],ecx
+ jmp NEAR L$100good_key
+align 16
+L$09412rounds:
+ movq xmm2,[16+eax]
+ cmp ebp,268435456
+ je NEAR L$10212rounds_alt
+ mov ecx,11
+ movups [edx-16],xmm0
+db 102,15,58,223,202,1
+ call L$103key_192a_cold
+db 102,15,58,223,202,2
+ call L$104key_192b
+db 102,15,58,223,202,4
+ call L$105key_192a
+db 102,15,58,223,202,8
+ call L$104key_192b
+db 102,15,58,223,202,16
+ call L$105key_192a
+db 102,15,58,223,202,32
+ call L$104key_192b
+db 102,15,58,223,202,64
+ call L$105key_192a
+db 102,15,58,223,202,128
+ call L$104key_192b
+ movups [edx],xmm0
+ mov DWORD [48+edx],ecx
+ jmp NEAR L$100good_key
+align 16
+L$105key_192a:
+ movups [edx],xmm0
+ lea edx,[16+edx]
+align 16
+L$103key_192a_cold:
+ movaps xmm5,xmm2
+L$106key_192b_warm:
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ ret
+align 16
+L$104key_192b:
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups [edx],xmm5
+ shufps xmm3,xmm2,78
+ movups [16+edx],xmm3
+ lea edx,[32+edx]
+ jmp NEAR L$106key_192b_warm
+align 16
+L$10212rounds_alt:
+ movdqa xmm5,[16+ebx]
+ movdqa xmm4,[32+ebx]
+ mov ecx,8
+ movdqu [edx-16],xmm0
+L$107loop_key192:
+ movq [edx],xmm2
+ movdqa xmm1,xmm2
+db 102,15,56,0,213
+db 102,15,56,221,212
+ pslld xmm4,1
+ lea edx,[24+edx]
+ movdqa xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm0,xmm3
+ pshufd xmm3,xmm0,255
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+ pxor xmm0,xmm2
+ pxor xmm2,xmm3
+ movdqu [edx-16],xmm0
+ dec ecx
+ jnz NEAR L$107loop_key192
+ mov ecx,11
+ mov DWORD [32+edx],ecx
+ jmp NEAR L$100good_key
+align 16
+L$09314rounds:
+ movups xmm2,[16+eax]
+ lea edx,[16+edx]
+ cmp ebp,268435456
+ je NEAR L$10814rounds_alt
+ mov ecx,13
+ movups [edx-32],xmm0
+ movups [edx-16],xmm2
+db 102,15,58,223,202,1
+ call L$109key_256a_cold
+db 102,15,58,223,200,1
+ call L$110key_256b
+db 102,15,58,223,202,2
+ call L$111key_256a
+db 102,15,58,223,200,2
+ call L$110key_256b
+db 102,15,58,223,202,4
+ call L$111key_256a
+db 102,15,58,223,200,4
+ call L$110key_256b
+db 102,15,58,223,202,8
+ call L$111key_256a
+db 102,15,58,223,200,8
+ call L$110key_256b
+db 102,15,58,223,202,16
+ call L$111key_256a
+db 102,15,58,223,200,16
+ call L$110key_256b
+db 102,15,58,223,202,32
+ call L$111key_256a
+db 102,15,58,223,200,32
+ call L$110key_256b
+db 102,15,58,223,202,64
+ call L$111key_256a
+ movups [edx],xmm0
+ mov DWORD [16+edx],ecx
+ xor eax,eax
+ jmp NEAR L$100good_key
+align 16
+L$111key_256a:
+ movups [edx],xmm2
+ lea edx,[16+edx]
+L$109key_256a_cold:
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ ret
+align 16
+L$110key_256b:
+ movups [edx],xmm0
+ lea edx,[16+edx]
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ ret
+align 16
+L$10814rounds_alt:
+ movdqa xmm5,[ebx]
+ movdqa xmm4,[32+ebx]
+ mov ecx,7
+ movdqu [edx-32],xmm0
+ movdqa xmm1,xmm2
+ movdqu [edx-16],xmm2
+L$112loop_key256:
+db 102,15,56,0,213
+db 102,15,56,221,212
+ movdqa xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm0,xmm3
+ pslld xmm4,1
+ pxor xmm0,xmm2
+ movdqu [edx],xmm0
+ dec ecx
+ jz NEAR L$113done_key256
+ pshufd xmm2,xmm0,255
+ pxor xmm3,xmm3
+db 102,15,56,221,211
+ movdqa xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm1,xmm3
+ pxor xmm2,xmm1
+ movdqu [16+edx],xmm2
+ lea edx,[32+edx]
+ movdqa xmm1,xmm2
+ jmp NEAR L$112loop_key256
+L$113done_key256:
+ mov ecx,13
+ mov DWORD [16+edx],ecx
+L$100good_key:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ xor eax,eax
+ pop ebx
+ pop ebp
+ ret
+align 4
+L$091bad_pointer:
+ mov eax,-1
+ pop ebx
+ pop ebp
+ ret
+align 4
+L$095bad_keybits:
+ pxor xmm0,xmm0
+ mov eax,-2
+ pop ebx
+ pop ebp
+ ret
+global _aesni_set_encrypt_key
+align 16
+_aesni_set_encrypt_key:
+L$_aesni_set_encrypt_key_begin:
+ mov eax,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ call __aesni_set_encrypt_key
+ ret
+global _aesni_set_decrypt_key
+align 16
+_aesni_set_decrypt_key:
+L$_aesni_set_decrypt_key_begin:
+ mov eax,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ call __aesni_set_encrypt_key
+ mov edx,DWORD [12+esp]
+ shl ecx,4
+ test eax,eax
+ jnz NEAR L$114dec_key_ret
+ lea eax,[16+ecx*1+edx]
+ movups xmm0,[edx]
+ movups xmm1,[eax]
+ movups [eax],xmm0
+ movups [edx],xmm1
+ lea edx,[16+edx]
+ lea eax,[eax-16]
+L$115dec_key_inverse:
+ movups xmm0,[edx]
+ movups xmm1,[eax]
+db 102,15,56,219,192
+db 102,15,56,219,201
+ lea edx,[16+edx]
+ lea eax,[eax-16]
+ movups [16+eax],xmm0
+ movups [edx-16],xmm1
+ cmp eax,edx
+ ja NEAR L$115dec_key_inverse
+ movups xmm0,[edx]
+db 102,15,56,219,192
+ movups [edx],xmm0
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ xor eax,eax
+L$114dec_key_ret:
+ ret
+align 64
+L$key_const:
+dd 202313229,202313229,202313229,202313229
+dd 67569157,67569157,67569157,67569157
+dd 1,1,1,1
+dd 27,27,27,27
+db 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+db 115,108,46,111,114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/aes/vpaes-x86.asm b/win-x86/crypto/aes/vpaes-x86.asm
new file mode 100644
index 0000000..b08b056
--- /dev/null
+++ b/win-x86/crypto/aes/vpaes-x86.asm
@@ -0,0 +1,649 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+align 64
+L$_vpaes_consts:
+dd 218628480,235210255,168496130,67568393
+dd 252381056,17041926,33884169,51187212
+dd 252645135,252645135,252645135,252645135
+dd 1512730624,3266504856,1377990664,3401244816
+dd 830229760,1275146365,2969422977,3447763452
+dd 3411033600,2979783055,338359620,2782886510
+dd 4209124096,907596821,221174255,1006095553
+dd 191964160,3799684038,3164090317,1589111125
+dd 182528256,1777043520,2877432650,3265356744
+dd 1874708224,3503451415,3305285752,363511674
+dd 1606117888,3487855781,1093350906,2384367825
+dd 197121,67569157,134941193,202313229
+dd 67569157,134941193,202313229,197121
+dd 134941193,202313229,197121,67569157
+dd 202313229,197121,67569157,134941193
+dd 33619971,100992007,168364043,235736079
+dd 235736079,33619971,100992007,168364043
+dd 168364043,235736079,33619971,100992007
+dd 100992007,168364043,235736079,33619971
+dd 50462976,117835012,185207048,252579084
+dd 252314880,51251460,117574920,184942860
+dd 184682752,252054788,50987272,118359308
+dd 118099200,185467140,251790600,50727180
+dd 2946363062,528716217,1300004225,1881839624
+dd 1532713819,1532713819,1532713819,1532713819
+dd 3602276352,4288629033,3737020424,4153884961
+dd 1354558464,32357713,2958822624,3775749553
+dd 1201988352,132424512,1572796698,503232858
+dd 2213177600,1597421020,4103937655,675398315
+dd 2749646592,4273543773,1511898873,121693092
+dd 3040248576,1103263732,2871565598,1608280554
+dd 2236667136,2588920351,482954393,64377734
+dd 3069987328,291237287,2117370568,3650299247
+dd 533321216,3573750986,2572112006,1401264716
+dd 1339849704,2721158661,548607111,3445553514
+dd 2128193280,3054596040,2183486460,1257083700
+dd 655635200,1165381986,3923443150,2344132524
+dd 190078720,256924420,290342170,357187870
+dd 1610966272,2263057382,4103205268,309794674
+dd 2592527872,2233205587,1335446729,3402964816
+dd 3973531904,3225098121,3002836325,1918774430
+dd 3870401024,2102906079,2284471353,4117666579
+dd 617007872,1021508343,366931923,691083277
+dd 2528395776,3491914898,2968704004,1613121270
+dd 3445188352,3247741094,844474987,4093578302
+dd 651481088,1190302358,1689581232,574775300
+dd 4289380608,206939853,2555985458,2489840491
+dd 2130264064,327674451,3566485037,3349835193
+dd 2470714624,316102159,3636825756,3393945945
+db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+db 118,101,114,115,105,116,121,41,0
+align 64
+align 16
+__vpaes_preheat:
+ add ebp,DWORD [esp]
+ movdqa xmm7,[ebp-48]
+ movdqa xmm6,[ebp-16]
+ ret
+align 16
+__vpaes_encrypt_core:
+ mov ecx,16
+ mov eax,DWORD [240+edx]
+ movdqa xmm1,xmm6
+ movdqa xmm2,[ebp]
+ pandn xmm1,xmm0
+ pand xmm0,xmm6
+ movdqu xmm5,[edx]
+db 102,15,56,0,208
+ movdqa xmm0,[16+ebp]
+ pxor xmm2,xmm5
+ psrld xmm1,4
+ add edx,16
+db 102,15,56,0,193
+ lea ebx,[192+ebp]
+ pxor xmm0,xmm2
+ jmp NEAR L$000enc_entry
+align 16
+L$001enc_loop:
+ movdqa xmm4,[32+ebp]
+ movdqa xmm0,[48+ebp]
+db 102,15,56,0,226
+db 102,15,56,0,195
+ pxor xmm4,xmm5
+ movdqa xmm5,[64+ebp]
+ pxor xmm0,xmm4
+ movdqa xmm1,[ecx*1+ebx-64]
+db 102,15,56,0,234
+ movdqa xmm2,[80+ebp]
+ movdqa xmm4,[ecx*1+ebx]
+db 102,15,56,0,211
+ movdqa xmm3,xmm0
+ pxor xmm2,xmm5
+db 102,15,56,0,193
+ add edx,16
+ pxor xmm0,xmm2
+db 102,15,56,0,220
+ add ecx,16
+ pxor xmm3,xmm0
+db 102,15,56,0,193
+ and ecx,48
+ sub eax,1
+ pxor xmm0,xmm3
+L$000enc_entry:
+ movdqa xmm1,xmm6
+ movdqa xmm5,[ebp-32]
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm6
+db 102,15,56,0,232
+ movdqa xmm3,xmm7
+ pxor xmm0,xmm1
+db 102,15,56,0,217
+ movdqa xmm4,xmm7
+ pxor xmm3,xmm5
+db 102,15,56,0,224
+ movdqa xmm2,xmm7
+ pxor xmm4,xmm5
+db 102,15,56,0,211
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm0
+db 102,15,56,0,220
+ movdqu xmm5,[edx]
+ pxor xmm3,xmm1
+ jnz NEAR L$001enc_loop
+ movdqa xmm4,[96+ebp]
+ movdqa xmm0,[112+ebp]
+db 102,15,56,0,226
+ pxor xmm4,xmm5
+db 102,15,56,0,195
+ movdqa xmm1,[64+ecx*1+ebx]
+ pxor xmm0,xmm4
+db 102,15,56,0,193
+ ret
+align 16
+__vpaes_decrypt_core:
+ lea ebx,[608+ebp]
+ mov eax,DWORD [240+edx]
+ movdqa xmm1,xmm6
+ movdqa xmm2,[ebx-64]
+ pandn xmm1,xmm0
+ mov ecx,eax
+ psrld xmm1,4
+ movdqu xmm5,[edx]
+ shl ecx,4
+ pand xmm0,xmm6
+db 102,15,56,0,208
+ movdqa xmm0,[ebx-48]
+ xor ecx,48
+db 102,15,56,0,193
+ and ecx,48
+ pxor xmm2,xmm5
+ movdqa xmm5,[176+ebp]
+ pxor xmm0,xmm2
+ add edx,16
+ lea ecx,[ecx*1+ebx-352]
+ jmp NEAR L$002dec_entry
+align 16
+L$003dec_loop:
+ movdqa xmm4,[ebx-32]
+ movdqa xmm1,[ebx-16]
+db 102,15,56,0,226
+db 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,[ebx]
+ pxor xmm0,xmm1
+ movdqa xmm1,[16+ebx]
+db 102,15,56,0,226
+db 102,15,56,0,197
+db 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,[32+ebx]
+ pxor xmm0,xmm1
+ movdqa xmm1,[48+ebx]
+db 102,15,56,0,226
+db 102,15,56,0,197
+db 102,15,56,0,203
+ pxor xmm0,xmm4
+ movdqa xmm4,[64+ebx]
+ pxor xmm0,xmm1
+ movdqa xmm1,[80+ebx]
+db 102,15,56,0,226
+db 102,15,56,0,197
+db 102,15,56,0,203
+ pxor xmm0,xmm4
+ add edx,16
+db 102,15,58,15,237,12
+ pxor xmm0,xmm1
+ sub eax,1
+L$002dec_entry:
+ movdqa xmm1,xmm6
+ movdqa xmm2,[ebp-32]
+ pandn xmm1,xmm0
+ pand xmm0,xmm6
+ psrld xmm1,4
+db 102,15,56,0,208
+ movdqa xmm3,xmm7
+ pxor xmm0,xmm1
+db 102,15,56,0,217
+ movdqa xmm4,xmm7
+ pxor xmm3,xmm2
+db 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm7
+db 102,15,56,0,211
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm0
+db 102,15,56,0,220
+ movdqu xmm0,[edx]
+ pxor xmm3,xmm1
+ jnz NEAR L$003dec_loop
+ movdqa xmm4,[96+ebx]
+db 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,[112+ebx]
+ movdqa xmm2,[ecx]
+db 102,15,56,0,195
+ pxor xmm0,xmm4
+db 102,15,56,0,194
+ ret
+align 16
+__vpaes_schedule_core:
+ add ebp,DWORD [esp]
+ movdqu xmm0,[esi]
+ movdqa xmm2,[320+ebp]
+ movdqa xmm3,xmm0
+ lea ebx,[ebp]
+ movdqa [4+esp],xmm2
+ call __vpaes_schedule_transform
+ movdqa xmm7,xmm0
+ test edi,edi
+ jnz NEAR L$004schedule_am_decrypting
+ movdqu [edx],xmm0
+ jmp NEAR L$005schedule_go
+L$004schedule_am_decrypting:
+ movdqa xmm1,[256+ecx*1+ebp]
+db 102,15,56,0,217
+ movdqu [edx],xmm3
+ xor ecx,48
+L$005schedule_go:
+ cmp eax,192
+ ja NEAR L$006schedule_256
+ je NEAR L$007schedule_192
+L$008schedule_128:
+ mov eax,10
+L$009loop_schedule_128:
+ call __vpaes_schedule_round
+ dec eax
+ jz NEAR L$010schedule_mangle_last
+ call __vpaes_schedule_mangle
+ jmp NEAR L$009loop_schedule_128
+align 16
+L$007schedule_192:
+ movdqu xmm0,[8+esi]
+ call __vpaes_schedule_transform
+ movdqa xmm6,xmm0
+ pxor xmm4,xmm4
+ movhlps xmm6,xmm4
+ mov eax,4
+L$011loop_schedule_192:
+ call __vpaes_schedule_round
+db 102,15,58,15,198,8
+ call __vpaes_schedule_mangle
+ call __vpaes_schedule_192_smear
+ call __vpaes_schedule_mangle
+ call __vpaes_schedule_round
+ dec eax
+ jz NEAR L$010schedule_mangle_last
+ call __vpaes_schedule_mangle
+ call __vpaes_schedule_192_smear
+ jmp NEAR L$011loop_schedule_192
+align 16
+L$006schedule_256:
+ movdqu xmm0,[16+esi]
+ call __vpaes_schedule_transform
+ mov eax,7
+L$012loop_schedule_256:
+ call __vpaes_schedule_mangle
+ movdqa xmm6,xmm0
+ call __vpaes_schedule_round
+ dec eax
+ jz NEAR L$010schedule_mangle_last
+ call __vpaes_schedule_mangle
+ pshufd xmm0,xmm0,255
+ movdqa [20+esp],xmm7
+ movdqa xmm7,xmm6
+ call L$_vpaes_schedule_low_round
+ movdqa xmm7,[20+esp]
+ jmp NEAR L$012loop_schedule_256
+align 16
+L$010schedule_mangle_last:
+ lea ebx,[384+ebp]
+ test edi,edi
+ jnz NEAR L$013schedule_mangle_last_dec
+ movdqa xmm1,[256+ecx*1+ebp]
+db 102,15,56,0,193
+ lea ebx,[352+ebp]
+ add edx,32
+L$013schedule_mangle_last_dec:
+ add edx,-16
+ pxor xmm0,[336+ebp]
+ call __vpaes_schedule_transform
+ movdqu [edx],xmm0
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ ret
+align 16
+__vpaes_schedule_192_smear:
+ pshufd xmm1,xmm6,128
+ pshufd xmm0,xmm7,254
+ pxor xmm6,xmm1
+ pxor xmm1,xmm1
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm6
+ movhlps xmm6,xmm1
+ ret
+align 16
+__vpaes_schedule_round:
+ movdqa xmm2,[8+esp]
+ pxor xmm1,xmm1
+db 102,15,58,15,202,15
+db 102,15,58,15,210,15
+ pxor xmm7,xmm1
+ pshufd xmm0,xmm0,255
+db 102,15,58,15,192,1
+ movdqa [8+esp],xmm2
+L$_vpaes_schedule_low_round:
+ movdqa xmm1,xmm7
+ pslldq xmm7,4
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm7
+ pslldq xmm7,8
+ pxor xmm7,xmm1
+ pxor xmm7,[336+ebp]
+ movdqa xmm4,[ebp-16]
+ movdqa xmm5,[ebp-48]
+ movdqa xmm1,xmm4
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm4
+ movdqa xmm2,[ebp-32]
+db 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm5
+db 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm5
+db 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm5
+db 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm5
+db 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqa xmm4,[32+ebp]
+db 102,15,56,0,226
+ movdqa xmm0,[48+ebp]
+db 102,15,56,0,195
+ pxor xmm0,xmm4
+ pxor xmm0,xmm7
+ movdqa xmm7,xmm0
+ ret
+align 16
+__vpaes_schedule_transform:
+ movdqa xmm2,[ebp-16]
+ movdqa xmm1,xmm2
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm2
+ movdqa xmm2,[ebx]
+db 102,15,56,0,208
+ movdqa xmm0,[16+ebx]
+db 102,15,56,0,193
+ pxor xmm0,xmm2
+ ret
+align 16
+__vpaes_schedule_mangle:
+ movdqa xmm4,xmm0
+ movdqa xmm5,[128+ebp]
+ test edi,edi
+ jnz NEAR L$014schedule_mangle_dec
+ add edx,16
+ pxor xmm4,[336+ebp]
+db 102,15,56,0,229
+ movdqa xmm3,xmm4
+db 102,15,56,0,229
+ pxor xmm3,xmm4
+db 102,15,56,0,229
+ pxor xmm3,xmm4
+ jmp NEAR L$015schedule_mangle_both
+align 16
+L$014schedule_mangle_dec:
+ movdqa xmm2,[ebp-16]
+ lea esi,[416+ebp]
+ movdqa xmm1,xmm2
+ pandn xmm1,xmm4
+ psrld xmm1,4
+ pand xmm4,xmm2
+ movdqa xmm2,[esi]
+db 102,15,56,0,212
+ movdqa xmm3,[16+esi]
+db 102,15,56,0,217
+ pxor xmm3,xmm2
+db 102,15,56,0,221
+ movdqa xmm2,[32+esi]
+db 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,[48+esi]
+db 102,15,56,0,217
+ pxor xmm3,xmm2
+db 102,15,56,0,221
+ movdqa xmm2,[64+esi]
+db 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,[80+esi]
+db 102,15,56,0,217
+ pxor xmm3,xmm2
+db 102,15,56,0,221
+ movdqa xmm2,[96+esi]
+db 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,[112+esi]
+db 102,15,56,0,217
+ pxor xmm3,xmm2
+ add edx,-16
+L$015schedule_mangle_both:
+ movdqa xmm1,[256+ecx*1+ebp]
+db 102,15,56,0,217
+ add ecx,-16
+ and ecx,48
+ movdqu [edx],xmm3
+ ret
+global _vpaes_set_encrypt_key
+align 16
+_vpaes_set_encrypt_key:
+L$_vpaes_set_encrypt_key_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ lea ebx,[esp-56]
+ mov eax,DWORD [24+esp]
+ and ebx,-16
+ mov edx,DWORD [28+esp]
+ xchg ebx,esp
+ mov DWORD [48+esp],ebx
+ mov ebx,eax
+ shr ebx,5
+ add ebx,5
+ mov DWORD [240+edx],ebx
+ mov ecx,48
+ mov edi,0
+ lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
+ call __vpaes_schedule_core
+L$016pic_point:
+ mov esp,DWORD [48+esp]
+ xor eax,eax
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _vpaes_set_decrypt_key
+align 16
+_vpaes_set_decrypt_key:
+L$_vpaes_set_decrypt_key_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ lea ebx,[esp-56]
+ mov eax,DWORD [24+esp]
+ and ebx,-16
+ mov edx,DWORD [28+esp]
+ xchg ebx,esp
+ mov DWORD [48+esp],ebx
+ mov ebx,eax
+ shr ebx,5
+ add ebx,5
+ mov DWORD [240+edx],ebx
+ shl ebx,4
+ lea edx,[16+ebx*1+edx]
+ mov edi,1
+ mov ecx,eax
+ shr ecx,1
+ and ecx,32
+ xor ecx,32
+ lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
+ call __vpaes_schedule_core
+L$017pic_point:
+ mov esp,DWORD [48+esp]
+ xor eax,eax
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _vpaes_encrypt
+align 16
+_vpaes_encrypt:
+L$_vpaes_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
+ call __vpaes_preheat
+L$018pic_point:
+ mov esi,DWORD [20+esp]
+ lea ebx,[esp-56]
+ mov edi,DWORD [24+esp]
+ and ebx,-16
+ mov edx,DWORD [28+esp]
+ xchg ebx,esp
+ mov DWORD [48+esp],ebx
+ movdqu xmm0,[esi]
+ call __vpaes_encrypt_core
+ movdqu [edi],xmm0
+ mov esp,DWORD [48+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _vpaes_decrypt
+align 16
+_vpaes_decrypt:
+L$_vpaes_decrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
+ call __vpaes_preheat
+L$019pic_point:
+ mov esi,DWORD [20+esp]
+ lea ebx,[esp-56]
+ mov edi,DWORD [24+esp]
+ and ebx,-16
+ mov edx,DWORD [28+esp]
+ xchg ebx,esp
+ mov DWORD [48+esp],ebx
+ movdqu xmm0,[esi]
+ call __vpaes_decrypt_core
+ movdqu [edi],xmm0
+ mov esp,DWORD [48+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _vpaes_cbc_encrypt
+align 16
+_vpaes_cbc_encrypt:
+L$_vpaes_cbc_encrypt_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ sub eax,16
+ jc NEAR L$020cbc_abort
+ lea ebx,[esp-56]
+ mov ebp,DWORD [36+esp]
+ and ebx,-16
+ mov ecx,DWORD [40+esp]
+ xchg ebx,esp
+ movdqu xmm1,[ebp]
+ sub edi,esi
+ mov DWORD [48+esp],ebx
+ mov DWORD [esp],edi
+ mov DWORD [4+esp],edx
+ mov DWORD [8+esp],ebp
+ mov edi,eax
+ lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
+ call __vpaes_preheat
+L$021pic_point:
+ cmp ecx,0
+ je NEAR L$022cbc_dec_loop
+ jmp NEAR L$023cbc_enc_loop
+align 16
+L$023cbc_enc_loop:
+ movdqu xmm0,[esi]
+ pxor xmm0,xmm1
+ call __vpaes_encrypt_core
+ mov ebx,DWORD [esp]
+ mov edx,DWORD [4+esp]
+ movdqa xmm1,xmm0
+ movdqu [esi*1+ebx],xmm0
+ lea esi,[16+esi]
+ sub edi,16
+ jnc NEAR L$023cbc_enc_loop
+ jmp NEAR L$024cbc_done
+align 16
+L$022cbc_dec_loop:
+ movdqu xmm0,[esi]
+ movdqa [16+esp],xmm1
+ movdqa [32+esp],xmm0
+ call __vpaes_decrypt_core
+ mov ebx,DWORD [esp]
+ mov edx,DWORD [4+esp]
+ pxor xmm0,[16+esp]
+ movdqa xmm1,[32+esp]
+ movdqu [esi*1+ebx],xmm0
+ lea esi,[16+esi]
+ sub edi,16
+ jnc NEAR L$022cbc_dec_loop
+L$024cbc_done:
+ mov ebx,DWORD [8+esp]
+ mov esp,DWORD [48+esp]
+ movdqu [ebx],xmm1
+L$020cbc_abort:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
diff --git a/win-x86/crypto/bn/bn-586.asm b/win-x86/crypto/bn/bn-586.asm
new file mode 100644
index 0000000..b222040
--- /dev/null
+++ b/win-x86/crypto/bn/bn-586.asm
@@ -0,0 +1,1523 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _bn_mul_add_words
+align 16
+_bn_mul_add_words:
+L$_bn_mul_add_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$000maw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+ jmp NEAR L$001maw_sse2_entry
+align 16
+L$002maw_sse2_unrolled:
+ movd mm3,DWORD [eax]
+ paddq mm1,mm3
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ movd mm4,DWORD [4+edx]
+ pmuludq mm4,mm0
+ movd mm6,DWORD [8+edx]
+ pmuludq mm6,mm0
+ movd mm7,DWORD [12+edx]
+ pmuludq mm7,mm0
+ paddq mm1,mm2
+ movd mm3,DWORD [4+eax]
+ paddq mm3,mm4
+ movd mm5,DWORD [8+eax]
+ paddq mm5,mm6
+ movd mm4,DWORD [12+eax]
+ paddq mm7,mm4
+ movd DWORD [eax],mm1
+ movd mm2,DWORD [16+edx]
+ pmuludq mm2,mm0
+ psrlq mm1,32
+ movd mm4,DWORD [20+edx]
+ pmuludq mm4,mm0
+ paddq mm1,mm3
+ movd mm6,DWORD [24+edx]
+ pmuludq mm6,mm0
+ movd DWORD [4+eax],mm1
+ psrlq mm1,32
+ movd mm3,DWORD [28+edx]
+ add edx,32
+ pmuludq mm3,mm0
+ paddq mm1,mm5
+ movd mm5,DWORD [16+eax]
+ paddq mm2,mm5
+ movd DWORD [8+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm7
+ movd mm5,DWORD [20+eax]
+ paddq mm4,mm5
+ movd DWORD [12+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm2
+ movd mm5,DWORD [24+eax]
+ paddq mm6,mm5
+ movd DWORD [16+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm4
+ movd mm5,DWORD [28+eax]
+ paddq mm3,mm5
+ movd DWORD [20+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm6
+ movd DWORD [24+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm3
+ movd DWORD [28+eax],mm1
+ lea eax,[32+eax]
+ psrlq mm1,32
+ sub ecx,8
+ jz NEAR L$003maw_sse2_exit
+L$001maw_sse2_entry:
+ test ecx,4294967288
+ jnz NEAR L$002maw_sse2_unrolled
+align 4
+L$004maw_sse2_loop:
+ movd mm2,DWORD [edx]
+ movd mm3,DWORD [eax]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm3
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$004maw_sse2_loop
+L$003maw_sse2_exit:
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$000maw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ecx,DWORD [28+esp]
+ mov ebx,DWORD [24+esp]
+ and ecx,4294967288
+ mov ebp,DWORD [32+esp]
+ push ecx
+ jz NEAR L$005maw_finish
+align 16
+L$006maw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [28+edi]
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ sub ecx,8
+ lea ebx,[32+ebx]
+ lea edi,[32+edi]
+ jnz NEAR L$006maw_loop
+L$005maw_finish:
+ mov ecx,DWORD [32+esp]
+ and ecx,7
+ jnz NEAR L$007maw_finish2
+ jmp NEAR L$008maw_end
+L$007maw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$008maw_end:
+ mov eax,esi
+ pop ecx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_mul_words
+align 16
+_bn_mul_words:
+L$_bn_mul_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$009mw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+align 16
+L$010mw_sse2_loop:
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$010mw_sse2_loop
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$009mw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ebx,DWORD [24+esp]
+ mov ebp,DWORD [28+esp]
+ mov ecx,DWORD [32+esp]
+ and ebp,4294967288
+ jz NEAR L$011mw_finish
+L$012mw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ add ebx,32
+ add edi,32
+ sub ebp,8
+ jz NEAR L$011mw_finish
+ jmp NEAR L$012mw_loop
+L$011mw_finish:
+ mov ebp,DWORD [28+esp]
+ and ebp,7
+ jnz NEAR L$013mw_finish2
+ jmp NEAR L$014mw_end
+L$013mw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$014mw_end:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sqr_words
+align 16
+_bn_sqr_words:
+L$_bn_sqr_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$015sqr_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+align 16
+L$016sqr_sse2_loop:
+ movd mm0,DWORD [edx]
+ pmuludq mm0,mm0
+ lea edx,[4+edx]
+ movq [eax],mm0
+ sub ecx,1
+ lea eax,[8+eax]
+ jnz NEAR L$016sqr_sse2_loop
+ emms
+ ret
+align 16
+L$015sqr_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov ebx,DWORD [28+esp]
+ and ebx,4294967288
+ jz NEAR L$017sw_finish
+L$018sw_loop:
+ ; Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],edx
+ ; Round 4
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ mov DWORD [12+esi],edx
+ ; Round 8
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ mov DWORD [20+esi],edx
+ ; Round 12
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ mov DWORD [28+esi],edx
+ ; Round 16
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ mov DWORD [36+esi],edx
+ ; Round 20
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ mov DWORD [44+esi],edx
+ ; Round 24
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+ ; Round 28
+ mov eax,DWORD [28+edi]
+ mul eax
+ mov DWORD [56+esi],eax
+ mov DWORD [60+esi],edx
+ ;
+ add edi,32
+ add esi,64
+ sub ebx,8
+ jnz NEAR L$018sw_loop
+L$017sw_finish:
+ mov ebx,DWORD [28+esp]
+ and ebx,7
+ jz NEAR L$019sw_end
+ ; Tail Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ dec ebx
+ mov DWORD [4+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ dec ebx
+ mov DWORD [12+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ dec ebx
+ mov DWORD [20+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ dec ebx
+ mov DWORD [28+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ dec ebx
+ mov DWORD [36+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ dec ebx
+ mov DWORD [44+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+L$019sw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_div_words
+align 16
+_bn_div_words:
+L$_bn_div_words_begin:
+ mov edx,DWORD [4+esp]
+ mov eax,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ div ecx
+ ret
+global _bn_add_words
+align 16
+_bn_add_words:
+L$_bn_add_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$020aw_finish
+L$021aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$021aw_loop
+L$020aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$022aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$022aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_words
+align 16
+_bn_sub_words:
+L$_bn_sub_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$023aw_finish
+L$024aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$024aw_loop
+L$023aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$025aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$025aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_part_words
+align 16
+_bn_sub_part_words:
+L$_bn_sub_part_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$026aw_finish
+L$027aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$027aw_loop
+L$026aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$028aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+L$028aw_end:
+ cmp DWORD [36+esp],0
+ je NEAR L$029pw_end
+ mov ebp,DWORD [36+esp]
+ cmp ebp,0
+ je NEAR L$029pw_end
+ jge NEAR L$030pw_pos
+ ; pw_neg
+ mov edx,0
+ sub edx,ebp
+ mov ebp,edx
+ and ebp,4294967288
+ jz NEAR L$031pw_neg_finish
+L$032pw_neg_loop:
+ ; dl<0 Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; dl<0 Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; dl<0 Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; dl<0 Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; dl<0 Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; dl<0 Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; dl<0 Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; dl<0 Round 7
+ mov ecx,0
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$032pw_neg_loop
+L$031pw_neg_finish:
+ mov edx,DWORD [36+esp]
+ mov ebp,0
+ sub ebp,edx
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ jmp NEAR L$029pw_end
+L$030pw_pos:
+ and ebp,4294967288
+ jz NEAR L$033pw_pos_finish
+L$034pw_pos_loop:
+ ; dl>0 Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$035pw_nc0
+ ; dl>0 Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$036pw_nc1
+ ; dl>0 Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$037pw_nc2
+ ; dl>0 Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$038pw_nc3
+ ; dl>0 Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$039pw_nc4
+ ; dl>0 Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$040pw_nc5
+ ; dl>0 Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$041pw_nc6
+ ; dl>0 Round 7
+ mov ecx,DWORD [28+esi]
+ sub ecx,eax
+ mov DWORD [28+ebx],ecx
+ jnc NEAR L$042pw_nc7
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$034pw_pos_loop
+L$033pw_pos_finish:
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$043pw_tail_nc0
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$044pw_tail_nc1
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$045pw_tail_nc2
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$046pw_tail_nc3
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$047pw_tail_nc4
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$048pw_tail_nc5
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$049pw_tail_nc6
+ mov eax,1
+ jmp NEAR L$029pw_end
+L$050pw_nc_loop:
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$035pw_nc0:
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$036pw_nc1:
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$037pw_nc2:
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$038pw_nc3:
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$039pw_nc4:
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$040pw_nc5:
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$041pw_nc6:
+ mov ecx,DWORD [28+esi]
+ mov DWORD [28+ebx],ecx
+L$042pw_nc7:
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$050pw_nc_loop
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$043pw_tail_nc0:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$044pw_tail_nc1:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$045pw_tail_nc2:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$046pw_tail_nc3:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$047pw_tail_nc4:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$048pw_tail_nc5:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$049pw_tail_nc6:
+L$051pw_nc_end:
+ mov eax,0
+L$029pw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/bn/co-586.asm b/win-x86/crypto/bn/co-586.asm
new file mode 100644
index 0000000..5780dc8
--- /dev/null
+++ b/win-x86/crypto/bn/co-586.asm
@@ -0,0 +1,1260 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+global _bn_mul_comba8
+align 16
+_bn_mul_comba8:
+L$_bn_mul_comba8_begin:
+ push esi
+ mov esi,DWORD [12+esp]
+ push edi
+ mov edi,DWORD [20+esp]
+ push ebp
+ push ebx
+ xor ebx,ebx
+ mov eax,DWORD [esi]
+ xor ecx,ecx
+ mov edx,DWORD [edi]
+ ; ################## Calculate word 0
+ xor ebp,ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [eax],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx,ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [4+eax],ecx
+ mov eax,DWORD [8+esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx,ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [8+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp,ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [12+eax],ebx
+ mov eax,DWORD [16+esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx,ebx
+ ; mul a[4]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[0]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [16+eax],ecx
+ mov eax,DWORD [20+esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx,ecx
+ ; mul a[5]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[4]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [12+esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [16+edi]
+ adc ecx,0
+ ; mul a[1]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[0]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [20+eax],ebp
+ mov eax,DWORD [24+esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp,ebp
+ ; mul a[6]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[5]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [16+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[4]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [12+esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [16+edi]
+ adc ebp,0
+ ; mul a[2]*b[4]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [20+edi]
+ adc ebp,0
+ ; mul a[1]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[0]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [24+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[6]
+ ; ################## Calculate word 7
+ xor ebx,ebx
+ ; mul a[7]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[6]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[5]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [16+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[4]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[3]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [20+edi]
+ adc ebx,0
+ ; mul a[2]*b[5]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [24+edi]
+ adc ebx,0
+ ; mul a[1]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[0]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ mov DWORD [28+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[7]
+ ; ################## Calculate word 8
+ xor ecx,ecx
+ ; mul a[7]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [24+esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[6]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[5]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [16+edi]
+ adc ecx,0
+ ; mul a[4]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [12+esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[3]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [24+edi]
+ adc ecx,0
+ ; mul a[2]*b[6]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [28+edi]
+ adc ecx,0
+ ; mul a[1]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ mov DWORD [32+eax],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[8]
+ ; ################## Calculate word 9
+ xor ebp,ebp
+ ; mul a[7]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [24+esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[6]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [16+edi]
+ adc ebp,0
+ ; mul a[5]*b[4]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [16+esi]
+ adc ecx,edx
+ mov edx,DWORD [20+edi]
+ adc ebp,0
+ ; mul a[4]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [12+esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[3]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [28+edi]
+ adc ebp,0
+ ; mul a[2]*b[7]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ mov DWORD [36+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[9]
+ ; ################## Calculate word 10
+ xor ebx,ebx
+ ; mul a[7]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[6]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esi]
+ adc ebp,edx
+ mov edx,DWORD [20+edi]
+ adc ebx,0
+ ; mul a[5]*b[5]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [16+esi]
+ adc ebp,edx
+ mov edx,DWORD [24+edi]
+ adc ebx,0
+ ; mul a[4]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[3]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ mov DWORD [40+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[10]
+ ; ################## Calculate word 11
+ xor ecx,ecx
+ ; mul a[7]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [24+esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[6]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esi]
+ adc ebx,edx
+ mov edx,DWORD [24+edi]
+ adc ecx,0
+ ; mul a[5]*b[6]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [28+edi]
+ adc ecx,0
+ ; mul a[4]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ mov DWORD [44+eax],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[11]
+ ; ################## Calculate word 12
+ xor ebp,ebp
+ ; mul a[7]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [24+esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[6]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [28+edi]
+ adc ebp,0
+ ; mul a[5]*b[7]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ mov DWORD [48+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[12]
+ ; ################## Calculate word 13
+ xor ebx,ebx
+ ; mul a[7]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[6]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ mov DWORD [52+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[13]
+ ; ################## Calculate word 14
+ xor ecx,ecx
+ ; mul a[7]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ adc ecx,0
+ mov DWORD [56+eax],ebp
+ ; saved r[14]
+ ; save r[15]
+ mov DWORD [60+eax],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_mul_comba4
+align 16
+_bn_mul_comba4:
+L$_bn_mul_comba4_begin:
+ push esi
+ mov esi,DWORD [12+esp]
+ push edi
+ mov edi,DWORD [20+esp]
+ push ebp
+ push ebx
+ xor ebx,ebx
+ mov eax,DWORD [esi]
+ xor ecx,ecx
+ mov edx,DWORD [edi]
+ ; ################## Calculate word 0
+ xor ebp,ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [eax],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx,ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [4+eax],ecx
+ mov eax,DWORD [8+esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx,ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [8+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp,ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ mov DWORD [12+eax],ebx
+ mov eax,DWORD [12+esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx,ebx
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ mov DWORD [16+eax],ecx
+ mov eax,DWORD [12+esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx,ecx
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ mov DWORD [20+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp,ebp
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ adc ebp,0
+ mov DWORD [24+eax],ebx
+ ; saved r[6]
+ ; save r[7]
+ mov DWORD [28+eax],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_sqr_comba8
+align 16
+_bn_sqr_comba8:
+L$_bn_sqr_comba8_begin:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ xor ebx,ebx
+ xor ecx,ecx
+ mov eax,DWORD [esi]
+ ; ############### Calculate word 0
+ xor ebp,ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [edi],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx,ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [4+edi],ecx
+ mov edx,DWORD [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx,ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [4+esi]
+ adc ecx,0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [esi]
+ adc ecx,0
+ mov DWORD [8+edi],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp,ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [8+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [16+esi]
+ adc ebp,0
+ mov DWORD [12+edi],ebx
+ mov edx,DWORD [esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx,ebx
+ ; sqr a[4]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [12+esi]
+ adc ebx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [esi]
+ adc ebx,0
+ mov DWORD [16+edi],ecx
+ mov eax,DWORD [20+esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx,ecx
+ ; sqr a[5]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [16+esi]
+ adc ecx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[4]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [12+esi]
+ adc ecx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov DWORD [20+edi],ebp
+ mov edx,DWORD [esi]
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp,ebp
+ ; sqr a[6]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [20+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[5]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [16+esi]
+ adc ebp,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[4]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [12+esi]
+ adc ebp,0
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [24+edi],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[6]
+ ; ############### Calculate word 7
+ xor ebx,ebx
+ ; sqr a[7]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [24+esi]
+ adc ebx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[6]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [20+esi]
+ adc ebx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[5]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [16+esi]
+ adc ebx,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[4]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [28+esi]
+ adc ebx,0
+ mov DWORD [28+edi],ecx
+ mov edx,DWORD [4+esi]
+ ; saved r[7]
+ ; ############### Calculate word 8
+ xor ecx,ecx
+ ; sqr a[7]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[6]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [20+esi]
+ adc ecx,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[5]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [16+esi]
+ adc ecx,0
+ ; sqr a[4]*a[4]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [8+esi]
+ adc ecx,0
+ mov DWORD [32+edi],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[8]
+ ; ############### Calculate word 9
+ xor ebp,ebp
+ ; sqr a[7]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [24+esi]
+ adc ebp,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[6]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [20+esi]
+ adc ebp,0
+ mov edx,DWORD [16+esi]
+ ; sqr a[5]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [28+esi]
+ adc ebp,0
+ mov DWORD [36+edi],ebx
+ mov edx,DWORD [12+esi]
+ ; saved r[9]
+ ; ############### Calculate word 10
+ xor ebx,ebx
+ ; sqr a[7]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [24+esi]
+ adc ebx,0
+ mov edx,DWORD [16+esi]
+ ; sqr a[6]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [20+esi]
+ adc ebx,0
+ ; sqr a[5]*a[5]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [16+esi]
+ adc ebx,0
+ mov DWORD [40+edi],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[10]
+ ; ############### Calculate word 11
+ xor ecx,ecx
+ ; sqr a[7]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov edx,DWORD [20+esi]
+ ; sqr a[6]*a[5]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [28+esi]
+ adc ecx,0
+ mov DWORD [44+edi],ebp
+ mov edx,DWORD [20+esi]
+ ; saved r[11]
+ ; ############### Calculate word 12
+ xor ebp,ebp
+ ; sqr a[7]*a[5]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [24+esi]
+ adc ebp,0
+ ; sqr a[6]*a[6]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [24+esi]
+ adc ebp,0
+ mov DWORD [48+edi],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[12]
+ ; ############### Calculate word 13
+ xor ebx,ebx
+ ; sqr a[7]*a[6]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [28+esi]
+ adc ebx,0
+ mov DWORD [52+edi],ecx
+ ; saved r[13]
+ ; ############### Calculate word 14
+ xor ecx,ecx
+ ; sqr a[7]*a[7]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ adc ecx,0
+ mov DWORD [56+edi],ebp
+ ; saved r[14]
+ mov DWORD [60+edi],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_sqr_comba4
+align 16
+_bn_sqr_comba4:
+L$_bn_sqr_comba4_begin:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ xor ebx,ebx
+ xor ecx,ecx
+ mov eax,DWORD [esi]
+ ; ############### Calculate word 0
+ xor ebp,ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [edi],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx,ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [4+edi],ecx
+ mov edx,DWORD [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx,ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [4+esi]
+ adc ecx,0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [esi]
+ adc ecx,0
+ mov DWORD [8+edi],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp,ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [8+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [12+esi]
+ adc ebp,0
+ mov DWORD [12+edi],ebx
+ mov edx,DWORD [4+esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx,ebx
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [16+edi],ecx
+ mov eax,DWORD [12+esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx,ecx
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [12+esi]
+ adc ecx,0
+ mov DWORD [20+edi],ebp
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp,ebp
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ adc ebp,0
+ mov DWORD [24+edi],ebx
+ ; saved r[6]
+ mov DWORD [28+edi],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
diff --git a/win-x86/crypto/bn/x86-mont.asm b/win-x86/crypto/bn/x86-mont.asm
new file mode 100644
index 0000000..de7b949
--- /dev/null
+++ b/win-x86/crypto/bn/x86-mont.asm
@@ -0,0 +1,469 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _bn_mul_mont
+align 16
+_bn_mul_mont:
+L$_bn_mul_mont_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ xor eax,eax
+ mov edi,DWORD [40+esp]
+ cmp edi,4
+ jl NEAR L$000just_leave
+ lea esi,[20+esp]
+ lea edx,[24+esp]
+ mov ebp,esp
+ add edi,2
+ neg edi
+ lea esp,[edi*4+esp-32]
+ neg edi
+ mov eax,esp
+ sub eax,edx
+ and eax,2047
+ sub esp,eax
+ xor edx,esp
+ and edx,2048
+ xor edx,2048
+ sub esp,edx
+ and esp,-64
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ mov esi,DWORD [16+esi]
+ mov esi,DWORD [esi]
+ mov DWORD [4+esp],eax
+ mov DWORD [8+esp],ebx
+ mov DWORD [12+esp],ecx
+ mov DWORD [16+esp],edx
+ mov DWORD [20+esp],esi
+ lea ebx,[edi-3]
+ mov DWORD [24+esp],ebp
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$001non_sse2
+ mov eax,-1
+ movd mm7,eax
+ mov esi,DWORD [8+esp]
+ mov edi,DWORD [12+esp]
+ mov ebp,DWORD [16+esp]
+ xor edx,edx
+ xor ecx,ecx
+ movd mm4,DWORD [edi]
+ movd mm5,DWORD [esi]
+ movd mm3,DWORD [ebp]
+ pmuludq mm5,mm4
+ movq mm2,mm5
+ movq mm0,mm5
+ pand mm0,mm7
+ pmuludq mm5,[20+esp]
+ pmuludq mm3,mm5
+ paddq mm3,mm0
+ movd mm1,DWORD [4+ebp]
+ movd mm0,DWORD [4+esi]
+ psrlq mm2,32
+ psrlq mm3,32
+ inc ecx
+align 16
+L$0021st:
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ movd mm1,DWORD [4+ecx*4+ebp]
+ paddq mm3,mm0
+ movd mm0,DWORD [4+ecx*4+esi]
+ psrlq mm2,32
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm3,32
+ lea ecx,[1+ecx]
+ cmp ecx,ebx
+ jl NEAR L$0021st
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ paddq mm3,mm0
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm2,32
+ psrlq mm3,32
+ paddq mm3,mm2
+ movq [32+ebx*4+esp],mm3
+ inc edx
+L$003outer:
+ xor ecx,ecx
+ movd mm4,DWORD [edx*4+edi]
+ movd mm5,DWORD [esi]
+ movd mm6,DWORD [32+esp]
+ movd mm3,DWORD [ebp]
+ pmuludq mm5,mm4
+ paddq mm5,mm6
+ movq mm0,mm5
+ movq mm2,mm5
+ pand mm0,mm7
+ pmuludq mm5,[20+esp]
+ pmuludq mm3,mm5
+ paddq mm3,mm0
+ movd mm6,DWORD [36+esp]
+ movd mm1,DWORD [4+ebp]
+ movd mm0,DWORD [4+esi]
+ psrlq mm2,32
+ psrlq mm3,32
+ paddq mm2,mm6
+ inc ecx
+ dec ebx
+L$004inner:
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ movd mm6,DWORD [36+ecx*4+esp]
+ pand mm0,mm7
+ movd mm1,DWORD [4+ecx*4+ebp]
+ paddq mm3,mm0
+ movd mm0,DWORD [4+ecx*4+esi]
+ psrlq mm2,32
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm3,32
+ paddq mm2,mm6
+ dec ebx
+ lea ecx,[1+ecx]
+ jnz NEAR L$004inner
+ mov ebx,ecx
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ paddq mm3,mm0
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm2,32
+ psrlq mm3,32
+ movd mm6,DWORD [36+ebx*4+esp]
+ paddq mm3,mm2
+ paddq mm3,mm6
+ movq [32+ebx*4+esp],mm3
+ lea edx,[1+edx]
+ cmp edx,ebx
+ jle NEAR L$003outer
+ emms
+ jmp NEAR L$005common_tail
+align 16
+L$001non_sse2:
+ mov esi,DWORD [8+esp]
+ lea ebp,[1+ebx]
+ mov edi,DWORD [12+esp]
+ xor ecx,ecx
+ mov edx,esi
+ and ebp,1
+ sub edx,edi
+ lea eax,[4+ebx*4+edi]
+ or ebp,edx
+ mov edi,DWORD [edi]
+ jz NEAR L$006bn_sqr_mont
+ mov DWORD [28+esp],eax
+ mov eax,DWORD [esi]
+ xor edx,edx
+align 16
+L$007mull:
+ mov ebp,edx
+ mul edi
+ add ebp,eax
+ lea ecx,[1+ecx]
+ adc edx,0
+ mov eax,DWORD [ecx*4+esi]
+ cmp ecx,ebx
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$007mull
+ mov ebp,edx
+ mul edi
+ mov edi,DWORD [20+esp]
+ add eax,ebp
+ mov esi,DWORD [16+esp]
+ adc edx,0
+ imul edi,DWORD [32+esp]
+ mov DWORD [32+ebx*4+esp],eax
+ xor ecx,ecx
+ mov DWORD [36+ebx*4+esp],edx
+ mov DWORD [40+ebx*4+esp],ecx
+ mov eax,DWORD [esi]
+ mul edi
+ add eax,DWORD [32+esp]
+ mov eax,DWORD [4+esi]
+ adc edx,0
+ inc ecx
+ jmp NEAR L$0082ndmadd
+align 16
+L$0091stmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$0091stmadd
+ mov ebp,edx
+ mul edi
+ add eax,DWORD [32+ebx*4+esp]
+ mov edi,DWORD [20+esp]
+ adc edx,0
+ mov esi,DWORD [16+esp]
+ add ebp,eax
+ adc edx,0
+ imul edi,DWORD [32+esp]
+ xor ecx,ecx
+ add edx,DWORD [36+ebx*4+esp]
+ mov DWORD [32+ebx*4+esp],ebp
+ adc ecx,0
+ mov eax,DWORD [esi]
+ mov DWORD [36+ebx*4+esp],edx
+ mov DWORD [40+ebx*4+esp],ecx
+ mul edi
+ add eax,DWORD [32+esp]
+ mov eax,DWORD [4+esi]
+ adc edx,0
+ mov ecx,1
+align 16
+L$0082ndmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [24+ecx*4+esp],ebp
+ jl NEAR L$0082ndmadd
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ebx*4+esp]
+ adc edx,0
+ add ebp,eax
+ adc edx,0
+ mov DWORD [28+ebx*4+esp],ebp
+ xor eax,eax
+ mov ecx,DWORD [12+esp]
+ add edx,DWORD [36+ebx*4+esp]
+ adc eax,DWORD [40+ebx*4+esp]
+ lea ecx,[4+ecx]
+ mov DWORD [32+ebx*4+esp],edx
+ cmp ecx,DWORD [28+esp]
+ mov DWORD [36+ebx*4+esp],eax
+ je NEAR L$005common_tail
+ mov edi,DWORD [ecx]
+ mov esi,DWORD [8+esp]
+ mov DWORD [12+esp],ecx
+ xor ecx,ecx
+ xor edx,edx
+ mov eax,DWORD [esi]
+ jmp NEAR L$0091stmadd
+align 16
+L$006bn_sqr_mont:
+ mov DWORD [esp],ebx
+ mov DWORD [12+esp],ecx
+ mov eax,edi
+ mul edi
+ mov DWORD [32+esp],eax
+ mov ebx,edx
+ shr edx,1
+ and ebx,1
+ inc ecx
+align 16
+L$010sqr:
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ lea ecx,[1+ecx]
+ adc edx,0
+ lea ebp,[eax*2+ebx]
+ shr eax,31
+ cmp ecx,DWORD [esp]
+ mov ebx,eax
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$010sqr
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ mov edi,DWORD [20+esp]
+ adc edx,0
+ mov esi,DWORD [16+esp]
+ lea ebp,[eax*2+ebx]
+ imul edi,DWORD [32+esp]
+ shr eax,31
+ mov DWORD [32+ecx*4+esp],ebp
+ lea ebp,[edx*2+eax]
+ mov eax,DWORD [esi]
+ shr edx,31
+ mov DWORD [36+ecx*4+esp],ebp
+ mov DWORD [40+ecx*4+esp],edx
+ mul edi
+ add eax,DWORD [32+esp]
+ mov ebx,ecx
+ adc edx,0
+ mov eax,DWORD [4+esi]
+ mov ecx,1
+align 16
+L$0113rdmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [4+ecx*4+esi]
+ adc edx,0
+ mov DWORD [28+ecx*4+esp],ebp
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [36+ecx*4+esp]
+ lea ecx,[2+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [24+ecx*4+esp],ebp
+ jl NEAR L$0113rdmadd
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ebx*4+esp]
+ adc edx,0
+ add ebp,eax
+ adc edx,0
+ mov DWORD [28+ebx*4+esp],ebp
+ mov ecx,DWORD [12+esp]
+ xor eax,eax
+ mov esi,DWORD [8+esp]
+ add edx,DWORD [36+ebx*4+esp]
+ adc eax,DWORD [40+ebx*4+esp]
+ mov DWORD [32+ebx*4+esp],edx
+ cmp ecx,ebx
+ mov DWORD [36+ebx*4+esp],eax
+ je NEAR L$005common_tail
+ mov edi,DWORD [4+ecx*4+esi]
+ lea ecx,[1+ecx]
+ mov eax,edi
+ mov DWORD [12+esp],ecx
+ mul edi
+ add eax,DWORD [32+ecx*4+esp]
+ adc edx,0
+ mov DWORD [32+ecx*4+esp],eax
+ xor ebp,ebp
+ cmp ecx,ebx
+ lea ecx,[1+ecx]
+ je NEAR L$012sqrlast
+ mov ebx,edx
+ shr edx,1
+ and ebx,1
+align 16
+L$013sqradd:
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ lea ebp,[eax*1+eax]
+ adc edx,0
+ shr eax,31
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc eax,0
+ add ebp,ebx
+ adc eax,0
+ cmp ecx,DWORD [esp]
+ mov DWORD [28+ecx*4+esp],ebp
+ mov ebx,eax
+ jle NEAR L$013sqradd
+ mov ebp,edx
+ add edx,edx
+ shr ebp,31
+ add edx,ebx
+ adc ebp,0
+L$012sqrlast:
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [16+esp]
+ imul edi,DWORD [32+esp]
+ add edx,DWORD [32+ecx*4+esp]
+ mov eax,DWORD [esi]
+ adc ebp,0
+ mov DWORD [32+ecx*4+esp],edx
+ mov DWORD [36+ecx*4+esp],ebp
+ mul edi
+ add eax,DWORD [32+esp]
+ lea ebx,[ecx-1]
+ adc edx,0
+ mov ecx,1
+ mov eax,DWORD [4+esi]
+ jmp NEAR L$0113rdmadd
+align 16
+L$005common_tail:
+ mov ebp,DWORD [16+esp]
+ mov edi,DWORD [4+esp]
+ lea esi,[32+esp]
+ mov eax,DWORD [esi]
+ mov ecx,ebx
+ xor edx,edx
+align 16
+L$014sub:
+ sbb eax,DWORD [edx*4+ebp]
+ mov DWORD [edx*4+edi],eax
+ dec ecx
+ mov eax,DWORD [4+edx*4+esi]
+ lea edx,[1+edx]
+ jge NEAR L$014sub
+ sbb eax,0
+align 16
+L$015copy:
+ mov edx,DWORD [ebx*4+esi]
+ mov ebp,DWORD [ebx*4+edi]
+ xor edx,ebp
+ and edx,eax
+ xor edx,ebp
+ mov DWORD [ebx*4+esi],ecx
+ mov DWORD [ebx*4+edi],edx
+ dec ebx
+ jge NEAR L$015copy
+ mov esp,DWORD [24+esp]
+ mov eax,1
+L$000just_leave:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+db 111,114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/cpu-x86-asm.asm b/win-x86/crypto/cpu-x86-asm.asm
new file mode 100644
index 0000000..4317a73
--- /dev/null
+++ b/win-x86/crypto/cpu-x86-asm.asm
@@ -0,0 +1,303 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+global _OPENSSL_ia32_cpuid
+align 16
+_OPENSSL_ia32_cpuid:
+L$_OPENSSL_ia32_cpuid_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ xor edx,edx
+ pushfd
+ pop eax
+ mov ecx,eax
+ xor eax,2097152
+ push eax
+ popfd
+ pushfd
+ pop eax
+ xor ecx,eax
+ xor eax,eax
+ bt ecx,21
+ jnc NEAR L$000nocpuid
+ mov esi,DWORD [20+esp]
+ mov DWORD [8+esi],eax
+ cpuid
+ mov edi,eax
+ xor eax,eax
+ cmp ebx,1970169159
+ setne al
+ mov ebp,eax
+ cmp edx,1231384169
+ setne al
+ or ebp,eax
+ cmp ecx,1818588270
+ setne al
+ or ebp,eax
+ jz NEAR L$001intel
+ cmp ebx,1752462657
+ setne al
+ mov esi,eax
+ cmp edx,1769238117
+ setne al
+ or esi,eax
+ cmp ecx,1145913699
+ setne al
+ or esi,eax
+ jnz NEAR L$001intel
+ mov eax,2147483648
+ cpuid
+ cmp eax,2147483649
+ jb NEAR L$001intel
+ mov esi,eax
+ mov eax,2147483649
+ cpuid
+ or ebp,ecx
+ and ebp,2049
+ cmp esi,2147483656
+ jb NEAR L$001intel
+ mov eax,2147483656
+ cpuid
+ movzx esi,cl
+ inc esi
+ mov eax,1
+ xor ecx,ecx
+ cpuid
+ bt edx,28
+ jnc NEAR L$002generic
+ shr ebx,16
+ and ebx,255
+ cmp ebx,esi
+ ja NEAR L$002generic
+ and edx,4026531839
+ jmp NEAR L$002generic
+L$001intel:
+ cmp edi,7
+ jb NEAR L$003cacheinfo
+ mov esi,DWORD [20+esp]
+ mov eax,7
+ xor ecx,ecx
+ cpuid
+ mov DWORD [8+esi],ebx
+L$003cacheinfo:
+ cmp edi,4
+ mov edi,-1
+ jb NEAR L$004nocacheinfo
+ mov eax,4
+ mov ecx,0
+ cpuid
+ mov edi,eax
+ shr edi,14
+ and edi,4095
+L$004nocacheinfo:
+ mov eax,1
+ xor ecx,ecx
+ cpuid
+ and edx,3220176895
+ cmp ebp,0
+ jne NEAR L$005notintel
+ or edx,1073741824
+L$005notintel:
+ bt edx,28
+ jnc NEAR L$002generic
+ and edx,4026531839
+ cmp edi,0
+ je NEAR L$002generic
+ or edx,268435456
+ shr ebx,16
+ cmp bl,1
+ ja NEAR L$002generic
+ and edx,4026531839
+L$002generic:
+ and ebp,2048
+ and ecx,4294965247
+ mov esi,edx
+ or ebp,ecx
+ bt ecx,27
+ jnc NEAR L$006clear_avx
+ xor ecx,ecx
+db 15,1,208
+ and eax,6
+ cmp eax,6
+ je NEAR L$007done
+ cmp eax,2
+ je NEAR L$006clear_avx
+L$008clear_xmm:
+ and ebp,4261412861
+ and esi,4278190079
+L$006clear_avx:
+ and ebp,4026525695
+ mov edi,DWORD [20+esp]
+ and DWORD [8+edi],4294967263
+L$007done:
+ mov eax,esi
+ mov edx,ebp
+L$000nocpuid:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+;extern _OPENSSL_ia32cap_P
+global _OPENSSL_rdtsc
+align 16
+_OPENSSL_rdtsc:
+L$_OPENSSL_rdtsc_begin:
+ xor eax,eax
+ xor edx,edx
+ lea ecx,[_OPENSSL_ia32cap_P]
+ bt DWORD [ecx],4
+ jnc NEAR L$009notsc
+ rdtsc
+L$009notsc:
+ ret
+global _OPENSSL_instrument_halt
+align 16
+_OPENSSL_instrument_halt:
+L$_OPENSSL_instrument_halt_begin:
+ lea ecx,[_OPENSSL_ia32cap_P]
+ bt DWORD [ecx],4
+ jnc NEAR L$010nohalt
+dd 2421723150
+ and eax,3
+ jnz NEAR L$010nohalt
+ pushfd
+ pop eax
+ bt eax,9
+ jnc NEAR L$010nohalt
+ rdtsc
+ push edx
+ push eax
+ hlt
+ rdtsc
+ sub eax,DWORD [esp]
+ sbb edx,DWORD [4+esp]
+ add esp,8
+ ret
+L$010nohalt:
+ xor eax,eax
+ xor edx,edx
+ ret
+global _OPENSSL_far_spin
+align 16
+_OPENSSL_far_spin:
+L$_OPENSSL_far_spin_begin:
+ pushfd
+ pop eax
+ bt eax,9
+ jnc NEAR L$011nospin
+ mov eax,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+dd 2430111262
+ xor eax,eax
+ mov edx,DWORD [ecx]
+ jmp NEAR L$012spin
+align 16
+L$012spin:
+ inc eax
+ cmp edx,DWORD [ecx]
+ je NEAR L$012spin
+dd 529567888
+ ret
+L$011nospin:
+ xor eax,eax
+ xor edx,edx
+ ret
+global _OPENSSL_wipe_cpu
+align 16
+_OPENSSL_wipe_cpu:
+L$_OPENSSL_wipe_cpu_begin:
+ xor eax,eax
+ xor edx,edx
+ lea ecx,[_OPENSSL_ia32cap_P]
+ mov ecx,DWORD [ecx]
+ bt DWORD [ecx],1
+ jnc NEAR L$013no_x87
+ and ecx,83886080
+ cmp ecx,83886080
+ jne NEAR L$014no_sse2
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+L$014no_sse2:
+dd 4007259865,4007259865,4007259865,4007259865,2430851995
+L$013no_x87:
+ lea eax,[4+esp]
+ ret
+global _OPENSSL_atomic_add
+align 16
+_OPENSSL_atomic_add:
+L$_OPENSSL_atomic_add_begin:
+ mov edx,DWORD [4+esp]
+ mov ecx,DWORD [8+esp]
+ push ebx
+ nop
+ mov eax,DWORD [edx]
+L$015spin:
+ lea ebx,[ecx*1+eax]
+ nop
+dd 447811568
+ jne NEAR L$015spin
+ mov eax,ebx
+ pop ebx
+ ret
+global _OPENSSL_indirect_call
+align 16
+_OPENSSL_indirect_call:
+L$_OPENSSL_indirect_call_begin:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ mov ecx,DWORD [12+ebp]
+ mov DWORD [esp],ecx
+ mov edx,DWORD [16+ebp]
+ mov DWORD [4+esp],edx
+ mov eax,DWORD [20+ebp]
+ mov DWORD [8+esp],eax
+ mov eax,DWORD [24+ebp]
+ mov DWORD [12+esp],eax
+ mov eax,DWORD [28+ebp]
+ mov DWORD [16+esp],eax
+ mov eax,DWORD [32+ebp]
+ mov DWORD [20+esp],eax
+ mov eax,DWORD [36+ebp]
+ mov DWORD [24+esp],eax
+ call DWORD [8+ebp]
+ mov esp,ebp
+ pop ebp
+ ret
+global _OPENSSL_ia32_rdrand
+align 16
+_OPENSSL_ia32_rdrand:
+L$_OPENSSL_ia32_rdrand_begin:
+ mov ecx,8
+L$016loop:
+db 15,199,240
+ jc NEAR L$017break
+ loop L$016loop
+L$017break:
+ cmp eax,0
+ cmove eax,ecx
+ ret
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/md5/md5-586.asm b/win-x86/crypto/md5/md5-586.asm
new file mode 100644
index 0000000..67ee216
--- /dev/null
+++ b/win-x86/crypto/md5/md5-586.asm
@@ -0,0 +1,691 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+global _md5_block_asm_data_order
+align 16
+_md5_block_asm_data_order:
+L$_md5_block_asm_data_order_begin:
+ push esi
+ push edi
+ mov edi,DWORD [12+esp]
+ mov esi,DWORD [16+esp]
+ mov ecx,DWORD [20+esp]
+ push ebp
+ shl ecx,6
+ push ebx
+ add ecx,esi
+ sub ecx,64
+ mov eax,DWORD [edi]
+ push ecx
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+L$000start:
+ ;
+ ; R0 section
+ mov edi,ecx
+ mov ebp,DWORD [esi]
+ ; R0 0
+ xor edi,edx
+ and edi,ebx
+ lea eax,[3614090360+ebp*1+eax]
+ xor edi,edx
+ add eax,edi
+ mov edi,ebx
+ rol eax,7
+ mov ebp,DWORD [4+esi]
+ add eax,ebx
+ ; R0 1
+ xor edi,ecx
+ and edi,eax
+ lea edx,[3905402710+ebp*1+edx]
+ xor edi,ecx
+ add edx,edi
+ mov edi,eax
+ rol edx,12
+ mov ebp,DWORD [8+esi]
+ add edx,eax
+ ; R0 2
+ xor edi,ebx
+ and edi,edx
+ lea ecx,[606105819+ebp*1+ecx]
+ xor edi,ebx
+ add ecx,edi
+ mov edi,edx
+ rol ecx,17
+ mov ebp,DWORD [12+esi]
+ add ecx,edx
+ ; R0 3
+ xor edi,eax
+ and edi,ecx
+ lea ebx,[3250441966+ebp*1+ebx]
+ xor edi,eax
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,22
+ mov ebp,DWORD [16+esi]
+ add ebx,ecx
+ ; R0 4
+ xor edi,edx
+ and edi,ebx
+ lea eax,[4118548399+ebp*1+eax]
+ xor edi,edx
+ add eax,edi
+ mov edi,ebx
+ rol eax,7
+ mov ebp,DWORD [20+esi]
+ add eax,ebx
+ ; R0 5
+ xor edi,ecx
+ and edi,eax
+ lea edx,[1200080426+ebp*1+edx]
+ xor edi,ecx
+ add edx,edi
+ mov edi,eax
+ rol edx,12
+ mov ebp,DWORD [24+esi]
+ add edx,eax
+ ; R0 6
+ xor edi,ebx
+ and edi,edx
+ lea ecx,[2821735955+ebp*1+ecx]
+ xor edi,ebx
+ add ecx,edi
+ mov edi,edx
+ rol ecx,17
+ mov ebp,DWORD [28+esi]
+ add ecx,edx
+ ; R0 7
+ xor edi,eax
+ and edi,ecx
+ lea ebx,[4249261313+ebp*1+ebx]
+ xor edi,eax
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,22
+ mov ebp,DWORD [32+esi]
+ add ebx,ecx
+ ; R0 8
+ xor edi,edx
+ and edi,ebx
+ lea eax,[1770035416+ebp*1+eax]
+ xor edi,edx
+ add eax,edi
+ mov edi,ebx
+ rol eax,7
+ mov ebp,DWORD [36+esi]
+ add eax,ebx
+ ; R0 9
+ xor edi,ecx
+ and edi,eax
+ lea edx,[2336552879+ebp*1+edx]
+ xor edi,ecx
+ add edx,edi
+ mov edi,eax
+ rol edx,12
+ mov ebp,DWORD [40+esi]
+ add edx,eax
+ ; R0 10
+ xor edi,ebx
+ and edi,edx
+ lea ecx,[4294925233+ebp*1+ecx]
+ xor edi,ebx
+ add ecx,edi
+ mov edi,edx
+ rol ecx,17
+ mov ebp,DWORD [44+esi]
+ add ecx,edx
+ ; R0 11
+ xor edi,eax
+ and edi,ecx
+ lea ebx,[2304563134+ebp*1+ebx]
+ xor edi,eax
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,22
+ mov ebp,DWORD [48+esi]
+ add ebx,ecx
+ ; R0 12
+ xor edi,edx
+ and edi,ebx
+ lea eax,[1804603682+ebp*1+eax]
+ xor edi,edx
+ add eax,edi
+ mov edi,ebx
+ rol eax,7
+ mov ebp,DWORD [52+esi]
+ add eax,ebx
+ ; R0 13
+ xor edi,ecx
+ and edi,eax
+ lea edx,[4254626195+ebp*1+edx]
+ xor edi,ecx
+ add edx,edi
+ mov edi,eax
+ rol edx,12
+ mov ebp,DWORD [56+esi]
+ add edx,eax
+ ; R0 14
+ xor edi,ebx
+ and edi,edx
+ lea ecx,[2792965006+ebp*1+ecx]
+ xor edi,ebx
+ add ecx,edi
+ mov edi,edx
+ rol ecx,17
+ mov ebp,DWORD [60+esi]
+ add ecx,edx
+ ; R0 15
+ xor edi,eax
+ and edi,ecx
+ lea ebx,[1236535329+ebp*1+ebx]
+ xor edi,eax
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,22
+ mov ebp,DWORD [4+esi]
+ add ebx,ecx
+ ;
+ ; R1 section
+ ; R1 16
+ lea eax,[4129170786+ebp*1+eax]
+ xor edi,ebx
+ and edi,edx
+ mov ebp,DWORD [24+esi]
+ xor edi,ecx
+ add eax,edi
+ mov edi,ebx
+ rol eax,5
+ add eax,ebx
+ ; R1 17
+ lea edx,[3225465664+ebp*1+edx]
+ xor edi,eax
+ and edi,ecx
+ mov ebp,DWORD [44+esi]
+ xor edi,ebx
+ add edx,edi
+ mov edi,eax
+ rol edx,9
+ add edx,eax
+ ; R1 18
+ lea ecx,[643717713+ebp*1+ecx]
+ xor edi,edx
+ and edi,ebx
+ mov ebp,DWORD [esi]
+ xor edi,eax
+ add ecx,edi
+ mov edi,edx
+ rol ecx,14
+ add ecx,edx
+ ; R1 19
+ lea ebx,[3921069994+ebp*1+ebx]
+ xor edi,ecx
+ and edi,eax
+ mov ebp,DWORD [20+esi]
+ xor edi,edx
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,20
+ add ebx,ecx
+ ; R1 20
+ lea eax,[3593408605+ebp*1+eax]
+ xor edi,ebx
+ and edi,edx
+ mov ebp,DWORD [40+esi]
+ xor edi,ecx
+ add eax,edi
+ mov edi,ebx
+ rol eax,5
+ add eax,ebx
+ ; R1 21
+ lea edx,[38016083+ebp*1+edx]
+ xor edi,eax
+ and edi,ecx
+ mov ebp,DWORD [60+esi]
+ xor edi,ebx
+ add edx,edi
+ mov edi,eax
+ rol edx,9
+ add edx,eax
+ ; R1 22
+ lea ecx,[3634488961+ebp*1+ecx]
+ xor edi,edx
+ and edi,ebx
+ mov ebp,DWORD [16+esi]
+ xor edi,eax
+ add ecx,edi
+ mov edi,edx
+ rol ecx,14
+ add ecx,edx
+ ; R1 23
+ lea ebx,[3889429448+ebp*1+ebx]
+ xor edi,ecx
+ and edi,eax
+ mov ebp,DWORD [36+esi]
+ xor edi,edx
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,20
+ add ebx,ecx
+ ; R1 24
+ lea eax,[568446438+ebp*1+eax]
+ xor edi,ebx
+ and edi,edx
+ mov ebp,DWORD [56+esi]
+ xor edi,ecx
+ add eax,edi
+ mov edi,ebx
+ rol eax,5
+ add eax,ebx
+ ; R1 25
+ lea edx,[3275163606+ebp*1+edx]
+ xor edi,eax
+ and edi,ecx
+ mov ebp,DWORD [12+esi]
+ xor edi,ebx
+ add edx,edi
+ mov edi,eax
+ rol edx,9
+ add edx,eax
+ ; R1 26
+ lea ecx,[4107603335+ebp*1+ecx]
+ xor edi,edx
+ and edi,ebx
+ mov ebp,DWORD [32+esi]
+ xor edi,eax
+ add ecx,edi
+ mov edi,edx
+ rol ecx,14
+ add ecx,edx
+ ; R1 27
+ lea ebx,[1163531501+ebp*1+ebx]
+ xor edi,ecx
+ and edi,eax
+ mov ebp,DWORD [52+esi]
+ xor edi,edx
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,20
+ add ebx,ecx
+ ; R1 28
+ lea eax,[2850285829+ebp*1+eax]
+ xor edi,ebx
+ and edi,edx
+ mov ebp,DWORD [8+esi]
+ xor edi,ecx
+ add eax,edi
+ mov edi,ebx
+ rol eax,5
+ add eax,ebx
+ ; R1 29
+ lea edx,[4243563512+ebp*1+edx]
+ xor edi,eax
+ and edi,ecx
+ mov ebp,DWORD [28+esi]
+ xor edi,ebx
+ add edx,edi
+ mov edi,eax
+ rol edx,9
+ add edx,eax
+ ; R1 30
+ lea ecx,[1735328473+ebp*1+ecx]
+ xor edi,edx
+ and edi,ebx
+ mov ebp,DWORD [48+esi]
+ xor edi,eax
+ add ecx,edi
+ mov edi,edx
+ rol ecx,14
+ add ecx,edx
+ ; R1 31
+ lea ebx,[2368359562+ebp*1+ebx]
+ xor edi,ecx
+ and edi,eax
+ mov ebp,DWORD [20+esi]
+ xor edi,edx
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,20
+ add ebx,ecx
+ ;
+ ; R2 section
+ ; R2 32
+ xor edi,edx
+ xor edi,ebx
+ lea eax,[4294588738+ebp*1+eax]
+ add eax,edi
+ rol eax,4
+ mov ebp,DWORD [32+esi]
+ mov edi,ebx
+ ; R2 33
+ lea edx,[2272392833+ebp*1+edx]
+ add eax,ebx
+ xor edi,ecx
+ xor edi,eax
+ mov ebp,DWORD [44+esi]
+ add edx,edi
+ mov edi,eax
+ rol edx,11
+ add edx,eax
+ ; R2 34
+ xor edi,ebx
+ xor edi,edx
+ lea ecx,[1839030562+ebp*1+ecx]
+ add ecx,edi
+ rol ecx,16
+ mov ebp,DWORD [56+esi]
+ mov edi,edx
+ ; R2 35
+ lea ebx,[4259657740+ebp*1+ebx]
+ add ecx,edx
+ xor edi,eax
+ xor edi,ecx
+ mov ebp,DWORD [4+esi]
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,23
+ add ebx,ecx
+ ; R2 36
+ xor edi,edx
+ xor edi,ebx
+ lea eax,[2763975236+ebp*1+eax]
+ add eax,edi
+ rol eax,4
+ mov ebp,DWORD [16+esi]
+ mov edi,ebx
+ ; R2 37
+ lea edx,[1272893353+ebp*1+edx]
+ add eax,ebx
+ xor edi,ecx
+ xor edi,eax
+ mov ebp,DWORD [28+esi]
+ add edx,edi
+ mov edi,eax
+ rol edx,11
+ add edx,eax
+ ; R2 38
+ xor edi,ebx
+ xor edi,edx
+ lea ecx,[4139469664+ebp*1+ecx]
+ add ecx,edi
+ rol ecx,16
+ mov ebp,DWORD [40+esi]
+ mov edi,edx
+ ; R2 39
+ lea ebx,[3200236656+ebp*1+ebx]
+ add ecx,edx
+ xor edi,eax
+ xor edi,ecx
+ mov ebp,DWORD [52+esi]
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,23
+ add ebx,ecx
+ ; R2 40
+ xor edi,edx
+ xor edi,ebx
+ lea eax,[681279174+ebp*1+eax]
+ add eax,edi
+ rol eax,4
+ mov ebp,DWORD [esi]
+ mov edi,ebx
+ ; R2 41
+ lea edx,[3936430074+ebp*1+edx]
+ add eax,ebx
+ xor edi,ecx
+ xor edi,eax
+ mov ebp,DWORD [12+esi]
+ add edx,edi
+ mov edi,eax
+ rol edx,11
+ add edx,eax
+ ; R2 42
+ xor edi,ebx
+ xor edi,edx
+ lea ecx,[3572445317+ebp*1+ecx]
+ add ecx,edi
+ rol ecx,16
+ mov ebp,DWORD [24+esi]
+ mov edi,edx
+ ; R2 43
+ lea ebx,[76029189+ebp*1+ebx]
+ add ecx,edx
+ xor edi,eax
+ xor edi,ecx
+ mov ebp,DWORD [36+esi]
+ add ebx,edi
+ mov edi,ecx
+ rol ebx,23
+ add ebx,ecx
+ ; R2 44
+ xor edi,edx
+ xor edi,ebx
+ lea eax,[3654602809+ebp*1+eax]
+ add eax,edi
+ rol eax,4
+ mov ebp,DWORD [48+esi]
+ mov edi,ebx
+ ; R2 45
+ lea edx,[3873151461+ebp*1+edx]
+ add eax,ebx
+ xor edi,ecx
+ xor edi,eax
+ mov ebp,DWORD [60+esi]
+ add edx,edi
+ mov edi,eax
+ rol edx,11
+ add edx,eax
+ ; R2 46
+ xor edi,ebx
+ xor edi,edx
+ lea ecx,[530742520+ebp*1+ecx]
+ add ecx,edi
+ rol ecx,16
+ mov ebp,DWORD [8+esi]
+ mov edi,edx
+ ; R2 47
+ lea ebx,[3299628645+ebp*1+ebx]
+ add ecx,edx
+ xor edi,eax
+ xor edi,ecx
+ mov ebp,DWORD [esi]
+ add ebx,edi
+ mov edi,-1
+ rol ebx,23
+ add ebx,ecx
+ ;
+ ; R3 section
+ ; R3 48
+ xor edi,edx
+ or edi,ebx
+ lea eax,[4096336452+ebp*1+eax]
+ xor edi,ecx
+ mov ebp,DWORD [28+esi]
+ add eax,edi
+ mov edi,-1
+ rol eax,6
+ xor edi,ecx
+ add eax,ebx
+ ; R3 49
+ or edi,eax
+ lea edx,[1126891415+ebp*1+edx]
+ xor edi,ebx
+ mov ebp,DWORD [56+esi]
+ add edx,edi
+ mov edi,-1
+ rol edx,10
+ xor edi,ebx
+ add edx,eax
+ ; R3 50
+ or edi,edx
+ lea ecx,[2878612391+ebp*1+ecx]
+ xor edi,eax
+ mov ebp,DWORD [20+esi]
+ add ecx,edi
+ mov edi,-1
+ rol ecx,15
+ xor edi,eax
+ add ecx,edx
+ ; R3 51
+ or edi,ecx
+ lea ebx,[4237533241+ebp*1+ebx]
+ xor edi,edx
+ mov ebp,DWORD [48+esi]
+ add ebx,edi
+ mov edi,-1
+ rol ebx,21
+ xor edi,edx
+ add ebx,ecx
+ ; R3 52
+ or edi,ebx
+ lea eax,[1700485571+ebp*1+eax]
+ xor edi,ecx
+ mov ebp,DWORD [12+esi]
+ add eax,edi
+ mov edi,-1
+ rol eax,6
+ xor edi,ecx
+ add eax,ebx
+ ; R3 53
+ or edi,eax
+ lea edx,[2399980690+ebp*1+edx]
+ xor edi,ebx
+ mov ebp,DWORD [40+esi]
+ add edx,edi
+ mov edi,-1
+ rol edx,10
+ xor edi,ebx
+ add edx,eax
+ ; R3 54
+ or edi,edx
+ lea ecx,[4293915773+ebp*1+ecx]
+ xor edi,eax
+ mov ebp,DWORD [4+esi]
+ add ecx,edi
+ mov edi,-1
+ rol ecx,15
+ xor edi,eax
+ add ecx,edx
+ ; R3 55
+ or edi,ecx
+ lea ebx,[2240044497+ebp*1+ebx]
+ xor edi,edx
+ mov ebp,DWORD [32+esi]
+ add ebx,edi
+ mov edi,-1
+ rol ebx,21
+ xor edi,edx
+ add ebx,ecx
+ ; R3 56
+ or edi,ebx
+ lea eax,[1873313359+ebp*1+eax]
+ xor edi,ecx
+ mov ebp,DWORD [60+esi]
+ add eax,edi
+ mov edi,-1
+ rol eax,6
+ xor edi,ecx
+ add eax,ebx
+ ; R3 57
+ or edi,eax
+ lea edx,[4264355552+ebp*1+edx]
+ xor edi,ebx
+ mov ebp,DWORD [24+esi]
+ add edx,edi
+ mov edi,-1
+ rol edx,10
+ xor edi,ebx
+ add edx,eax
+ ; R3 58
+ or edi,edx
+ lea ecx,[2734768916+ebp*1+ecx]
+ xor edi,eax
+ mov ebp,DWORD [52+esi]
+ add ecx,edi
+ mov edi,-1
+ rol ecx,15
+ xor edi,eax
+ add ecx,edx
+ ; R3 59
+ or edi,ecx
+ lea ebx,[1309151649+ebp*1+ebx]
+ xor edi,edx
+ mov ebp,DWORD [16+esi]
+ add ebx,edi
+ mov edi,-1
+ rol ebx,21
+ xor edi,edx
+ add ebx,ecx
+ ; R3 60
+ or edi,ebx
+ lea eax,[4149444226+ebp*1+eax]
+ xor edi,ecx
+ mov ebp,DWORD [44+esi]
+ add eax,edi
+ mov edi,-1
+ rol eax,6
+ xor edi,ecx
+ add eax,ebx
+ ; R3 61
+ or edi,eax
+ lea edx,[3174756917+ebp*1+edx]
+ xor edi,ebx
+ mov ebp,DWORD [8+esi]
+ add edx,edi
+ mov edi,-1
+ rol edx,10
+ xor edi,ebx
+ add edx,eax
+ ; R3 62
+ or edi,edx
+ lea ecx,[718787259+ebp*1+ecx]
+ xor edi,eax
+ mov ebp,DWORD [36+esi]
+ add ecx,edi
+ mov edi,-1
+ rol ecx,15
+ xor edi,eax
+ add ecx,edx
+ ; R3 63
+ or edi,ecx
+ lea ebx,[3951481745+ebp*1+ebx]
+ xor edi,edx
+ mov ebp,DWORD [24+esp]
+ add ebx,edi
+ add esi,64
+ rol ebx,21
+ mov edi,DWORD [ebp]
+ add ebx,ecx
+ add eax,edi
+ mov edi,DWORD [4+ebp]
+ add ebx,edi
+ mov edi,DWORD [8+ebp]
+ add ecx,edi
+ mov edi,DWORD [12+ebp]
+ add edx,edi
+ mov DWORD [ebp],eax
+ mov DWORD [4+ebp],ebx
+ mov edi,DWORD [esp]
+ mov DWORD [8+ebp],ecx
+ mov DWORD [12+ebp],edx
+ cmp edi,esi
+ jae NEAR L$000start
+ pop eax
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
diff --git a/win-x86/crypto/modes/ghash-x86.asm b/win-x86/crypto/modes/ghash-x86.asm
new file mode 100644
index 0000000..eb493ac
--- /dev/null
+++ b/win-x86/crypto/modes/ghash-x86.asm
@@ -0,0 +1,1265 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+global _gcm_gmult_4bit_x86
+align 16
+_gcm_gmult_4bit_x86:
+L$_gcm_gmult_4bit_x86_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ sub esp,84
+ mov edi,DWORD [104+esp]
+ mov esi,DWORD [108+esp]
+ mov ebp,DWORD [edi]
+ mov edx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov ebx,DWORD [12+edi]
+ mov DWORD [16+esp],0
+ mov DWORD [20+esp],471859200
+ mov DWORD [24+esp],943718400
+ mov DWORD [28+esp],610271232
+ mov DWORD [32+esp],1887436800
+ mov DWORD [36+esp],1822425088
+ mov DWORD [40+esp],1220542464
+ mov DWORD [44+esp],1423966208
+ mov DWORD [48+esp],3774873600
+ mov DWORD [52+esp],4246732800
+ mov DWORD [56+esp],3644850176
+ mov DWORD [60+esp],3311403008
+ mov DWORD [64+esp],2441084928
+ mov DWORD [68+esp],2376073216
+ mov DWORD [72+esp],2847932416
+ mov DWORD [76+esp],3051356160
+ mov DWORD [esp],ebp
+ mov DWORD [4+esp],edx
+ mov DWORD [8+esp],ecx
+ mov DWORD [12+esp],ebx
+ shr ebx,20
+ and ebx,240
+ mov ebp,DWORD [4+ebx*1+esi]
+ mov edx,DWORD [ebx*1+esi]
+ mov ecx,DWORD [12+ebx*1+esi]
+ mov ebx,DWORD [8+ebx*1+esi]
+ xor eax,eax
+ mov edi,15
+ jmp NEAR L$000x86_loop
+align 16
+L$000x86_loop:
+ mov al,bl
+ shrd ebx,ecx,4
+ and al,15
+ shrd ecx,edx,4
+ shrd edx,ebp,4
+ shr ebp,4
+ xor ebp,DWORD [16+eax*4+esp]
+ mov al,BYTE [edi*1+esp]
+ and al,240
+ xor ebx,DWORD [8+eax*1+esi]
+ xor ecx,DWORD [12+eax*1+esi]
+ xor edx,DWORD [eax*1+esi]
+ xor ebp,DWORD [4+eax*1+esi]
+ dec edi
+ js NEAR L$001x86_break
+ mov al,bl
+ shrd ebx,ecx,4
+ and al,15
+ shrd ecx,edx,4
+ shrd edx,ebp,4
+ shr ebp,4
+ xor ebp,DWORD [16+eax*4+esp]
+ mov al,BYTE [edi*1+esp]
+ shl al,4
+ xor ebx,DWORD [8+eax*1+esi]
+ xor ecx,DWORD [12+eax*1+esi]
+ xor edx,DWORD [eax*1+esi]
+ xor ebp,DWORD [4+eax*1+esi]
+ jmp NEAR L$000x86_loop
+align 16
+L$001x86_break:
+ bswap ebx
+ bswap ecx
+ bswap edx
+ bswap ebp
+ mov edi,DWORD [104+esp]
+ mov DWORD [12+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [4+edi],edx
+ mov DWORD [edi],ebp
+ add esp,84
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _gcm_ghash_4bit_x86
+align 16
+_gcm_ghash_4bit_x86:
+L$_gcm_ghash_4bit_x86_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ sub esp,84
+ mov ebx,DWORD [104+esp]
+ mov esi,DWORD [108+esp]
+ mov edi,DWORD [112+esp]
+ mov ecx,DWORD [116+esp]
+ add ecx,edi
+ mov DWORD [116+esp],ecx
+ mov ebp,DWORD [ebx]
+ mov edx,DWORD [4+ebx]
+ mov ecx,DWORD [8+ebx]
+ mov ebx,DWORD [12+ebx]
+ mov DWORD [16+esp],0
+ mov DWORD [20+esp],471859200
+ mov DWORD [24+esp],943718400
+ mov DWORD [28+esp],610271232
+ mov DWORD [32+esp],1887436800
+ mov DWORD [36+esp],1822425088
+ mov DWORD [40+esp],1220542464
+ mov DWORD [44+esp],1423966208
+ mov DWORD [48+esp],3774873600
+ mov DWORD [52+esp],4246732800
+ mov DWORD [56+esp],3644850176
+ mov DWORD [60+esp],3311403008
+ mov DWORD [64+esp],2441084928
+ mov DWORD [68+esp],2376073216
+ mov DWORD [72+esp],2847932416
+ mov DWORD [76+esp],3051356160
+align 16
+L$002x86_outer_loop:
+ xor ebx,DWORD [12+edi]
+ xor ecx,DWORD [8+edi]
+ xor edx,DWORD [4+edi]
+ xor ebp,DWORD [edi]
+ mov DWORD [12+esp],ebx
+ mov DWORD [8+esp],ecx
+ mov DWORD [4+esp],edx
+ mov DWORD [esp],ebp
+ shr ebx,20
+ and ebx,240
+ mov ebp,DWORD [4+ebx*1+esi]
+ mov edx,DWORD [ebx*1+esi]
+ mov ecx,DWORD [12+ebx*1+esi]
+ mov ebx,DWORD [8+ebx*1+esi]
+ xor eax,eax
+ mov edi,15
+ jmp NEAR L$003x86_loop
+align 16
+L$003x86_loop:
+ mov al,bl
+ shrd ebx,ecx,4
+ and al,15
+ shrd ecx,edx,4
+ shrd edx,ebp,4
+ shr ebp,4
+ xor ebp,DWORD [16+eax*4+esp]
+ mov al,BYTE [edi*1+esp]
+ and al,240
+ xor ebx,DWORD [8+eax*1+esi]
+ xor ecx,DWORD [12+eax*1+esi]
+ xor edx,DWORD [eax*1+esi]
+ xor ebp,DWORD [4+eax*1+esi]
+ dec edi
+ js NEAR L$004x86_break
+ mov al,bl
+ shrd ebx,ecx,4
+ and al,15
+ shrd ecx,edx,4
+ shrd edx,ebp,4
+ shr ebp,4
+ xor ebp,DWORD [16+eax*4+esp]
+ mov al,BYTE [edi*1+esp]
+ shl al,4
+ xor ebx,DWORD [8+eax*1+esi]
+ xor ecx,DWORD [12+eax*1+esi]
+ xor edx,DWORD [eax*1+esi]
+ xor ebp,DWORD [4+eax*1+esi]
+ jmp NEAR L$003x86_loop
+align 16
+L$004x86_break:
+ bswap ebx
+ bswap ecx
+ bswap edx
+ bswap ebp
+ mov edi,DWORD [112+esp]
+ lea edi,[16+edi]
+ cmp edi,DWORD [116+esp]
+ mov DWORD [112+esp],edi
+ jb NEAR L$002x86_outer_loop
+ mov edi,DWORD [104+esp]
+ mov DWORD [12+edi],ebx
+ mov DWORD [8+edi],ecx
+ mov DWORD [4+edi],edx
+ mov DWORD [edi],ebp
+ add esp,84
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _gcm_gmult_4bit_mmx
+align 16
+_gcm_gmult_4bit_mmx:
+L$_gcm_gmult_4bit_mmx_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ call L$005pic_point
+L$005pic_point:
+ pop eax
+ lea eax,[(L$rem_4bit-L$005pic_point)+eax]
+ movzx ebx,BYTE [15+edi]
+ xor ecx,ecx
+ mov edx,ebx
+ mov cl,dl
+ mov ebp,14
+ shl cl,4
+ and edx,240
+ movq mm0,[8+ecx*1+esi]
+ movq mm1,[ecx*1+esi]
+ movd ebx,mm0
+ jmp NEAR L$006mmx_loop
+align 16
+L$006mmx_loop:
+ psrlq mm0,4
+ and ebx,15
+ movq mm2,mm1
+ psrlq mm1,4
+ pxor mm0,[8+edx*1+esi]
+ mov cl,BYTE [ebp*1+edi]
+ psllq mm2,60
+ pxor mm1,[ebx*8+eax]
+ dec ebp
+ movd ebx,mm0
+ pxor mm1,[edx*1+esi]
+ mov edx,ecx
+ pxor mm0,mm2
+ js NEAR L$007mmx_break
+ shl cl,4
+ and ebx,15
+ psrlq mm0,4
+ and edx,240
+ movq mm2,mm1
+ psrlq mm1,4
+ pxor mm0,[8+ecx*1+esi]
+ psllq mm2,60
+ pxor mm1,[ebx*8+eax]
+ movd ebx,mm0
+ pxor mm1,[ecx*1+esi]
+ pxor mm0,mm2
+ jmp NEAR L$006mmx_loop
+align 16
+L$007mmx_break:
+ shl cl,4
+ and ebx,15
+ psrlq mm0,4
+ and edx,240
+ movq mm2,mm1
+ psrlq mm1,4
+ pxor mm0,[8+ecx*1+esi]
+ psllq mm2,60
+ pxor mm1,[ebx*8+eax]
+ movd ebx,mm0
+ pxor mm1,[ecx*1+esi]
+ pxor mm0,mm2
+ psrlq mm0,4
+ and ebx,15
+ movq mm2,mm1
+ psrlq mm1,4
+ pxor mm0,[8+edx*1+esi]
+ psllq mm2,60
+ pxor mm1,[ebx*8+eax]
+ movd ebx,mm0
+ pxor mm1,[edx*1+esi]
+ pxor mm0,mm2
+ psrlq mm0,32
+ movd edx,mm1
+ psrlq mm1,32
+ movd ecx,mm0
+ movd ebp,mm1
+ bswap ebx
+ bswap edx
+ bswap ecx
+ bswap ebp
+ emms
+ mov DWORD [12+edi],ebx
+ mov DWORD [4+edi],edx
+ mov DWORD [8+edi],ecx
+ mov DWORD [edi],ebp
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _gcm_ghash_4bit_mmx
+align 16
+_gcm_ghash_4bit_mmx:
+L$_gcm_ghash_4bit_mmx_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov eax,DWORD [20+esp]
+ mov ebx,DWORD [24+esp]
+ mov ecx,DWORD [28+esp]
+ mov edx,DWORD [32+esp]
+ mov ebp,esp
+ call L$008pic_point
+L$008pic_point:
+ pop esi
+ lea esi,[(L$rem_8bit-L$008pic_point)+esi]
+ sub esp,544
+ and esp,-64
+ sub esp,16
+ add edx,ecx
+ mov DWORD [544+esp],eax
+ mov DWORD [552+esp],edx
+ mov DWORD [556+esp],ebp
+ add ebx,128
+ lea edi,[144+esp]
+ lea ebp,[400+esp]
+ mov edx,DWORD [ebx-120]
+ movq mm0,[ebx-120]
+ movq mm3,[ebx-128]
+ shl edx,4
+ mov BYTE [esp],dl
+ mov edx,DWORD [ebx-104]
+ movq mm2,[ebx-104]
+ movq mm5,[ebx-112]
+ movq [edi-128],mm0
+ psrlq mm0,4
+ movq [edi],mm3
+ movq mm7,mm3
+ psrlq mm3,4
+ shl edx,4
+ mov BYTE [1+esp],dl
+ mov edx,DWORD [ebx-88]
+ movq mm1,[ebx-88]
+ psllq mm7,60
+ movq mm4,[ebx-96]
+ por mm0,mm7
+ movq [edi-120],mm2
+ psrlq mm2,4
+ movq [8+edi],mm5
+ movq mm6,mm5
+ movq [ebp-128],mm0
+ psrlq mm5,4
+ movq [ebp],mm3
+ shl edx,4
+ mov BYTE [2+esp],dl
+ mov edx,DWORD [ebx-72]
+ movq mm0,[ebx-72]
+ psllq mm6,60
+ movq mm3,[ebx-80]
+ por mm2,mm6
+ movq [edi-112],mm1
+ psrlq mm1,4
+ movq [16+edi],mm4
+ movq mm7,mm4
+ movq [ebp-120],mm2
+ psrlq mm4,4
+ movq [8+ebp],mm5
+ shl edx,4
+ mov BYTE [3+esp],dl
+ mov edx,DWORD [ebx-56]
+ movq mm2,[ebx-56]
+ psllq mm7,60
+ movq mm5,[ebx-64]
+ por mm1,mm7
+ movq [edi-104],mm0
+ psrlq mm0,4
+ movq [24+edi],mm3
+ movq mm6,mm3
+ movq [ebp-112],mm1
+ psrlq mm3,4
+ movq [16+ebp],mm4
+ shl edx,4
+ mov BYTE [4+esp],dl
+ mov edx,DWORD [ebx-40]
+ movq mm1,[ebx-40]
+ psllq mm6,60
+ movq mm4,[ebx-48]
+ por mm0,mm6
+ movq [edi-96],mm2
+ psrlq mm2,4
+ movq [32+edi],mm5
+ movq mm7,mm5
+ movq [ebp-104],mm0
+ psrlq mm5,4
+ movq [24+ebp],mm3
+ shl edx,4
+ mov BYTE [5+esp],dl
+ mov edx,DWORD [ebx-24]
+ movq mm0,[ebx-24]
+ psllq mm7,60
+ movq mm3,[ebx-32]
+ por mm2,mm7
+ movq [edi-88],mm1
+ psrlq mm1,4
+ movq [40+edi],mm4
+ movq mm6,mm4
+ movq [ebp-96],mm2
+ psrlq mm4,4
+ movq [32+ebp],mm5
+ shl edx,4
+ mov BYTE [6+esp],dl
+ mov edx,DWORD [ebx-8]
+ movq mm2,[ebx-8]
+ psllq mm6,60
+ movq mm5,[ebx-16]
+ por mm1,mm6
+ movq [edi-80],mm0
+ psrlq mm0,4
+ movq [48+edi],mm3
+ movq mm7,mm3
+ movq [ebp-88],mm1
+ psrlq mm3,4
+ movq [40+ebp],mm4
+ shl edx,4
+ mov BYTE [7+esp],dl
+ mov edx,DWORD [8+ebx]
+ movq mm1,[8+ebx]
+ psllq mm7,60
+ movq mm4,[ebx]
+ por mm0,mm7
+ movq [edi-72],mm2
+ psrlq mm2,4
+ movq [56+edi],mm5
+ movq mm6,mm5
+ movq [ebp-80],mm0
+ psrlq mm5,4
+ movq [48+ebp],mm3
+ shl edx,4
+ mov BYTE [8+esp],dl
+ mov edx,DWORD [24+ebx]
+ movq mm0,[24+ebx]
+ psllq mm6,60
+ movq mm3,[16+ebx]
+ por mm2,mm6
+ movq [edi-64],mm1
+ psrlq mm1,4
+ movq [64+edi],mm4
+ movq mm7,mm4
+ movq [ebp-72],mm2
+ psrlq mm4,4
+ movq [56+ebp],mm5
+ shl edx,4
+ mov BYTE [9+esp],dl
+ mov edx,DWORD [40+ebx]
+ movq mm2,[40+ebx]
+ psllq mm7,60
+ movq mm5,[32+ebx]
+ por mm1,mm7
+ movq [edi-56],mm0
+ psrlq mm0,4
+ movq [72+edi],mm3
+ movq mm6,mm3
+ movq [ebp-64],mm1
+ psrlq mm3,4
+ movq [64+ebp],mm4
+ shl edx,4
+ mov BYTE [10+esp],dl
+ mov edx,DWORD [56+ebx]
+ movq mm1,[56+ebx]
+ psllq mm6,60
+ movq mm4,[48+ebx]
+ por mm0,mm6
+ movq [edi-48],mm2
+ psrlq mm2,4
+ movq [80+edi],mm5
+ movq mm7,mm5
+ movq [ebp-56],mm0
+ psrlq mm5,4
+ movq [72+ebp],mm3
+ shl edx,4
+ mov BYTE [11+esp],dl
+ mov edx,DWORD [72+ebx]
+ movq mm0,[72+ebx]
+ psllq mm7,60
+ movq mm3,[64+ebx]
+ por mm2,mm7
+ movq [edi-40],mm1
+ psrlq mm1,4
+ movq [88+edi],mm4
+ movq mm6,mm4
+ movq [ebp-48],mm2
+ psrlq mm4,4
+ movq [80+ebp],mm5
+ shl edx,4
+ mov BYTE [12+esp],dl
+ mov edx,DWORD [88+ebx]
+ movq mm2,[88+ebx]
+ psllq mm6,60
+ movq mm5,[80+ebx]
+ por mm1,mm6
+ movq [edi-32],mm0
+ psrlq mm0,4
+ movq [96+edi],mm3
+ movq mm7,mm3
+ movq [ebp-40],mm1
+ psrlq mm3,4
+ movq [88+ebp],mm4
+ shl edx,4
+ mov BYTE [13+esp],dl
+ mov edx,DWORD [104+ebx]
+ movq mm1,[104+ebx]
+ psllq mm7,60
+ movq mm4,[96+ebx]
+ por mm0,mm7
+ movq [edi-24],mm2
+ psrlq mm2,4
+ movq [104+edi],mm5
+ movq mm6,mm5
+ movq [ebp-32],mm0
+ psrlq mm5,4
+ movq [96+ebp],mm3
+ shl edx,4
+ mov BYTE [14+esp],dl
+ mov edx,DWORD [120+ebx]
+ movq mm0,[120+ebx]
+ psllq mm6,60
+ movq mm3,[112+ebx]
+ por mm2,mm6
+ movq [edi-16],mm1
+ psrlq mm1,4
+ movq [112+edi],mm4
+ movq mm7,mm4
+ movq [ebp-24],mm2
+ psrlq mm4,4
+ movq [104+ebp],mm5
+ shl edx,4
+ mov BYTE [15+esp],dl
+ psllq mm7,60
+ por mm1,mm7
+ movq [edi-8],mm0
+ psrlq mm0,4
+ movq [120+edi],mm3
+ movq mm6,mm3
+ movq [ebp-16],mm1
+ psrlq mm3,4
+ movq [112+ebp],mm4
+ psllq mm6,60
+ por mm0,mm6
+ movq [ebp-8],mm0
+ movq [120+ebp],mm3
+ movq mm6,[eax]
+ mov ebx,DWORD [8+eax]
+ mov edx,DWORD [12+eax]
+align 16
+L$009outer:
+ xor edx,DWORD [12+ecx]
+ xor ebx,DWORD [8+ecx]
+ pxor mm6,[ecx]
+ lea ecx,[16+ecx]
+ mov DWORD [536+esp],ebx
+ movq [528+esp],mm6
+ mov DWORD [548+esp],ecx
+ xor eax,eax
+ rol edx,8
+ mov al,dl
+ mov ebp,eax
+ and al,15
+ shr ebp,4
+ pxor mm0,mm0
+ rol edx,8
+ pxor mm1,mm1
+ pxor mm2,mm2
+ movq mm7,[16+eax*8+esp]
+ movq mm6,[144+eax*8+esp]
+ mov al,dl
+ movd ebx,mm7
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ shr edi,4
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ shr ebp,4
+ pinsrw mm2,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ mov edx,DWORD [536+esp]
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm2
+ shr edi,4
+ pinsrw mm1,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm1
+ shr ebp,4
+ pinsrw mm0,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm0
+ shr edi,4
+ pinsrw mm2,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm2
+ shr ebp,4
+ pinsrw mm1,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ mov edx,DWORD [532+esp]
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm1
+ shr edi,4
+ pinsrw mm0,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm0
+ shr ebp,4
+ pinsrw mm2,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm2
+ shr edi,4
+ pinsrw mm1,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm1
+ shr ebp,4
+ pinsrw mm0,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ mov edx,DWORD [528+esp]
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm0
+ shr edi,4
+ pinsrw mm2,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm2
+ shr ebp,4
+ pinsrw mm1,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm1
+ shr edi,4
+ pinsrw mm0,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ mov al,dl
+ movd ecx,mm7
+ movzx ebx,bl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov ebp,eax
+ psrlq mm6,8
+ pxor mm7,[272+edi*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm0
+ shr ebp,4
+ pinsrw mm2,WORD [ebx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ rol edx,8
+ pxor mm6,[144+eax*8+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+edi*8+esp]
+ xor cl,BYTE [edi*1+esp]
+ mov al,dl
+ mov edx,DWORD [524+esp]
+ movd ebx,mm7
+ movzx ecx,cl
+ psrlq mm7,8
+ movq mm3,mm6
+ mov edi,eax
+ psrlq mm6,8
+ pxor mm7,[272+ebp*8+esp]
+ and al,15
+ psllq mm3,56
+ pxor mm6,mm2
+ shr edi,4
+ pinsrw mm1,WORD [ecx*2+esi],2
+ pxor mm7,[16+eax*8+esp]
+ pxor mm6,[144+eax*8+esp]
+ xor bl,BYTE [ebp*1+esp]
+ pxor mm7,mm3
+ pxor mm6,[400+ebp*8+esp]
+ movzx ebx,bl
+ pxor mm2,mm2
+ psllq mm1,4
+ movd ecx,mm7
+ psrlq mm7,4
+ movq mm3,mm6
+ psrlq mm6,4
+ shl ecx,4
+ pxor mm7,[16+edi*8+esp]
+ psllq mm3,60
+ movzx ecx,cl
+ pxor mm7,mm3
+ pxor mm6,[144+edi*8+esp]
+ pinsrw mm0,WORD [ebx*2+esi],2
+ pxor mm6,mm1
+ movd edx,mm7
+ pinsrw mm2,WORD [ecx*2+esi],3
+ psllq mm0,12
+ pxor mm6,mm0
+ psrlq mm7,32
+ pxor mm6,mm2
+ mov ecx,DWORD [548+esp]
+ movd ebx,mm7
+ movq mm3,mm6
+ psllw mm6,8
+ psrlw mm3,8
+ por mm6,mm3
+ bswap edx
+ pshufw mm6,mm6,27
+ bswap ebx
+ cmp ecx,DWORD [552+esp]
+ jne NEAR L$009outer
+ mov eax,DWORD [544+esp]
+ mov DWORD [12+eax],edx
+ mov DWORD [8+eax],ebx
+ movq [eax],mm6
+ mov esp,DWORD [556+esp]
+ emms
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _gcm_init_clmul
+align 16
+_gcm_init_clmul:
+L$_gcm_init_clmul_begin:
+ mov edx,DWORD [4+esp]
+ mov eax,DWORD [8+esp]
+ call L$010pic
+L$010pic:
+ pop ecx
+ lea ecx,[(L$bswap-L$010pic)+ecx]
+ movdqu xmm2,[eax]
+ pshufd xmm2,xmm2,78
+ pshufd xmm4,xmm2,255
+ movdqa xmm3,xmm2
+ psllq xmm2,1
+ pxor xmm5,xmm5
+ psrlq xmm3,63
+ pcmpgtd xmm5,xmm4
+ pslldq xmm3,8
+ por xmm2,xmm3
+ pand xmm5,[16+ecx]
+ pxor xmm2,xmm5
+ movdqa xmm0,xmm2
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+db 102,15,58,68,194,0
+db 102,15,58,68,202,17
+db 102,15,58,68,220,0
+ xorps xmm3,xmm0
+ xorps xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm2,78
+ pshufd xmm4,xmm0,78
+ pxor xmm3,xmm2
+ movdqu [edx],xmm2
+ pxor xmm4,xmm0
+ movdqu [16+edx],xmm0
+db 102,15,58,15,227,8
+ movdqu [32+edx],xmm4
+ ret
+global _gcm_gmult_clmul
+align 16
+_gcm_gmult_clmul:
+L$_gcm_gmult_clmul_begin:
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ call L$011pic
+L$011pic:
+ pop ecx
+ lea ecx,[(L$bswap-L$011pic)+ecx]
+ movdqu xmm0,[eax]
+ movdqa xmm5,[ecx]
+ movups xmm2,[edx]
+db 102,15,56,0,197
+ movups xmm4,[32+edx]
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pxor xmm3,xmm0
+db 102,15,58,68,194,0
+db 102,15,58,68,202,17
+db 102,15,58,68,220,0
+ xorps xmm3,xmm0
+ xorps xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+db 102,15,56,0,197
+ movdqu [eax],xmm0
+ ret
+global _gcm_ghash_clmul
+align 16
+_gcm_ghash_clmul:
+L$_gcm_ghash_clmul_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov eax,DWORD [20+esp]
+ mov edx,DWORD [24+esp]
+ mov esi,DWORD [28+esp]
+ mov ebx,DWORD [32+esp]
+ call L$012pic
+L$012pic:
+ pop ecx
+ lea ecx,[(L$bswap-L$012pic)+ecx]
+ movdqu xmm0,[eax]
+ movdqa xmm5,[ecx]
+ movdqu xmm2,[edx]
+db 102,15,56,0,197
+ sub ebx,16
+ jz NEAR L$013odd_tail
+ movdqu xmm3,[esi]
+ movdqu xmm6,[16+esi]
+db 102,15,56,0,221
+db 102,15,56,0,245
+ movdqu xmm5,[32+edx]
+ pxor xmm0,xmm3
+ pshufd xmm3,xmm6,78
+ movdqa xmm7,xmm6
+ pxor xmm3,xmm6
+ lea esi,[32+esi]
+db 102,15,58,68,242,0
+db 102,15,58,68,250,17
+db 102,15,58,68,221,0
+ movups xmm2,[16+edx]
+ nop
+ sub ebx,32
+ jbe NEAR L$014even_tail
+ jmp NEAR L$015mod_loop
+align 32
+L$015mod_loop:
+ pshufd xmm4,xmm0,78
+ movdqa xmm1,xmm0
+ pxor xmm4,xmm0
+ nop
+db 102,15,58,68,194,0
+db 102,15,58,68,202,17
+db 102,15,58,68,229,16
+ movups xmm2,[edx]
+ xorps xmm0,xmm6
+ movdqa xmm5,[ecx]
+ xorps xmm1,xmm7
+ movdqu xmm7,[esi]
+ pxor xmm3,xmm0
+ movdqu xmm6,[16+esi]
+ pxor xmm3,xmm1
+db 102,15,56,0,253
+ pxor xmm4,xmm3
+ movdqa xmm3,xmm4
+ psrldq xmm4,8
+ pslldq xmm3,8
+ pxor xmm1,xmm4
+ pxor xmm0,xmm3
+db 102,15,56,0,245
+ pxor xmm1,xmm7
+ movdqa xmm7,xmm6
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+db 102,15,58,68,242,0
+ movups xmm5,[32+edx]
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+ pshufd xmm3,xmm7,78
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm3,xmm7
+ pxor xmm1,xmm4
+db 102,15,58,68,250,17
+ movups xmm2,[16+edx]
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+db 102,15,58,68,221,0
+ lea esi,[32+esi]
+ sub ebx,32
+ ja NEAR L$015mod_loop
+L$014even_tail:
+ pshufd xmm4,xmm0,78
+ movdqa xmm1,xmm0
+ pxor xmm4,xmm0
+db 102,15,58,68,194,0
+db 102,15,58,68,202,17
+db 102,15,58,68,229,16
+ movdqa xmm5,[ecx]
+ xorps xmm0,xmm6
+ xorps xmm1,xmm7
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ pxor xmm4,xmm3
+ movdqa xmm3,xmm4
+ psrldq xmm4,8
+ pslldq xmm3,8
+ pxor xmm1,xmm4
+ pxor xmm0,xmm3
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+ test ebx,ebx
+ jnz NEAR L$016done
+ movups xmm2,[edx]
+L$013odd_tail:
+ movdqu xmm3,[esi]
+db 102,15,56,0,221
+ pxor xmm0,xmm3
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+db 102,15,58,68,194,0
+db 102,15,58,68,202,17
+db 102,15,58,68,220,0
+ xorps xmm3,xmm0
+ xorps xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm4,xmm0
+ movdqa xmm3,xmm0
+ psllq xmm0,5
+ pxor xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm3,xmm0
+ pslldq xmm0,8
+ psrldq xmm3,8
+ pxor xmm0,xmm4
+ pxor xmm1,xmm3
+ movdqa xmm4,xmm0
+ psrlq xmm0,1
+ pxor xmm1,xmm4
+ pxor xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm1
+L$016done:
+db 102,15,56,0,197
+ movdqu [eax],xmm0
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 64
+L$bswap:
+db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
+align 64
+L$rem_8bit:
+dw 0,450,900,582,1800,1738,1164,1358
+dw 3600,4050,3476,3158,2328,2266,2716,2910
+dw 7200,7650,8100,7782,6952,6890,6316,6510
+dw 4656,5106,4532,4214,5432,5370,5820,6014
+dw 14400,14722,15300,14854,16200,16010,15564,15630
+dw 13904,14226,13780,13334,12632,12442,13020,13086
+dw 9312,9634,10212,9766,9064,8874,8428,8494
+dw 10864,11186,10740,10294,11640,11450,12028,12094
+dw 28800,28994,29444,29382,30600,30282,29708,30158
+dw 32400,32594,32020,31958,31128,30810,31260,31710
+dw 27808,28002,28452,28390,27560,27242,26668,27118
+dw 25264,25458,24884,24822,26040,25722,26172,26622
+dw 18624,18690,19268,19078,20424,19978,19532,19854
+dw 18128,18194,17748,17558,16856,16410,16988,17310
+dw 21728,21794,22372,22182,21480,21034,20588,20910
+dw 23280,23346,22900,22710,24056,23610,24188,24510
+dw 57600,57538,57988,58182,58888,59338,58764,58446
+dw 61200,61138,60564,60758,59416,59866,60316,59998
+dw 64800,64738,65188,65382,64040,64490,63916,63598
+dw 62256,62194,61620,61814,62520,62970,63420,63102
+dw 55616,55426,56004,56070,56904,57226,56780,56334
+dw 55120,54930,54484,54550,53336,53658,54236,53790
+dw 50528,50338,50916,50982,49768,50090,49644,49198
+dw 52080,51890,51444,51510,52344,52666,53244,52798
+dw 37248,36930,37380,37830,38536,38730,38156,38094
+dw 40848,40530,39956,40406,39064,39258,39708,39646
+dw 36256,35938,36388,36838,35496,35690,35116,35054
+dw 33712,33394,32820,33270,33976,34170,34620,34558
+dw 43456,43010,43588,43910,44744,44810,44364,44174
+dw 42960,42514,42068,42390,41176,41242,41820,41630
+dw 46560,46114,46692,47014,45800,45866,45420,45230
+dw 48112,47666,47220,47542,48376,48442,49020,48830
+align 64
+L$rem_4bit:
+dd 0,0,0,471859200,0,943718400,0,610271232
+dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+db 0
diff --git a/win-x86/crypto/rc4/rc4-586.asm b/win-x86/crypto/rc4/rc4-586.asm
new file mode 100644
index 0000000..08cd9f6
--- /dev/null
+++ b/win-x86/crypto/rc4/rc4-586.asm
@@ -0,0 +1,382 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _asm_RC4
+align 16
+_asm_RC4:
+L$_asm_RC4_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edi,DWORD [20+esp]
+ mov edx,DWORD [24+esp]
+ mov esi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ xor ebx,ebx
+ cmp edx,0
+ je NEAR L$000abort
+ mov al,BYTE [edi]
+ mov bl,BYTE [4+edi]
+ add edi,8
+ lea ecx,[edx*1+esi]
+ sub ebp,esi
+ mov DWORD [24+esp],ecx
+ inc al
+ cmp DWORD [256+edi],-1
+ je NEAR L$001RC4_CHAR
+ mov ecx,DWORD [eax*4+edi]
+ and edx,-4
+ jz NEAR L$002loop1
+ mov DWORD [32+esp],ebp
+ test edx,-8
+ jz NEAR L$003go4loop4
+ lea ebp,[_OPENSSL_ia32cap_P]
+ bt DWORD [ebp],26
+ jnc NEAR L$003go4loop4
+ mov ebp,DWORD [32+esp]
+ and edx,-8
+ lea edx,[edx*1+esi-8]
+ mov DWORD [edi-4],edx
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ movq mm0,[esi]
+ mov ecx,DWORD [eax*4+edi]
+ movd mm2,DWORD [edx*4+edi]
+ jmp NEAR L$004loop_mmx_enter
+align 16
+L$005loop_mmx:
+ add bl,cl
+ psllq mm1,56
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ movq mm0,[esi]
+ movq [esi*1+ebp-8],mm2
+ mov ecx,DWORD [eax*4+edi]
+ movd mm2,DWORD [edx*4+edi]
+L$004loop_mmx_enter:
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm0
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,8
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,16
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,24
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,32
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,40
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ add bl,cl
+ psllq mm1,48
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ inc eax
+ add edx,ecx
+ movzx eax,al
+ movzx edx,dl
+ pxor mm2,mm1
+ mov ecx,DWORD [eax*4+edi]
+ movd mm1,DWORD [edx*4+edi]
+ mov edx,ebx
+ xor ebx,ebx
+ mov bl,dl
+ cmp esi,DWORD [edi-4]
+ lea esi,[8+esi]
+ jb NEAR L$005loop_mmx
+ psllq mm1,56
+ pxor mm2,mm1
+ movq [esi*1+ebp-8],mm2
+ emms
+ cmp esi,DWORD [24+esp]
+ je NEAR L$006done
+ jmp NEAR L$002loop1
+align 16
+L$003go4loop4:
+ lea edx,[edx*1+esi-4]
+ mov DWORD [28+esp],edx
+L$007loop4:
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ add edx,ecx
+ inc al
+ and edx,255
+ mov ecx,DWORD [eax*4+edi]
+ mov ebp,DWORD [edx*4+edi]
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ add edx,ecx
+ inc al
+ and edx,255
+ ror ebp,8
+ mov ecx,DWORD [eax*4+edi]
+ or ebp,DWORD [edx*4+edi]
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ add edx,ecx
+ inc al
+ and edx,255
+ ror ebp,8
+ mov ecx,DWORD [eax*4+edi]
+ or ebp,DWORD [edx*4+edi]
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ add edx,ecx
+ inc al
+ and edx,255
+ ror ebp,8
+ mov ecx,DWORD [32+esp]
+ or ebp,DWORD [edx*4+edi]
+ ror ebp,8
+ xor ebp,DWORD [esi]
+ cmp esi,DWORD [28+esp]
+ mov DWORD [esi*1+ecx],ebp
+ lea esi,[4+esi]
+ mov ecx,DWORD [eax*4+edi]
+ jb NEAR L$007loop4
+ cmp esi,DWORD [24+esp]
+ je NEAR L$006done
+ mov ebp,DWORD [32+esp]
+align 16
+L$002loop1:
+ add bl,cl
+ mov edx,DWORD [ebx*4+edi]
+ mov DWORD [ebx*4+edi],ecx
+ mov DWORD [eax*4+edi],edx
+ add edx,ecx
+ inc al
+ and edx,255
+ mov edx,DWORD [edx*4+edi]
+ xor dl,BYTE [esi]
+ lea esi,[1+esi]
+ mov ecx,DWORD [eax*4+edi]
+ cmp esi,DWORD [24+esp]
+ mov BYTE [esi*1+ebp-1],dl
+ jb NEAR L$002loop1
+ jmp NEAR L$006done
+align 16
+L$001RC4_CHAR:
+ movzx ecx,BYTE [eax*1+edi]
+L$008cloop1:
+ add bl,cl
+ movzx edx,BYTE [ebx*1+edi]
+ mov BYTE [ebx*1+edi],cl
+ mov BYTE [eax*1+edi],dl
+ add dl,cl
+ movzx edx,BYTE [edx*1+edi]
+ add al,1
+ xor dl,BYTE [esi]
+ lea esi,[1+esi]
+ movzx ecx,BYTE [eax*1+edi]
+ cmp esi,DWORD [24+esp]
+ mov BYTE [esi*1+ebp-1],dl
+ jb NEAR L$008cloop1
+L$006done:
+ dec al
+ mov DWORD [edi-4],ebx
+ mov BYTE [edi-8],al
+L$000abort:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _asm_RC4_set_key
+align 16
+_asm_RC4_set_key:
+L$_asm_RC4_set_key_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edi,DWORD [20+esp]
+ mov ebp,DWORD [24+esp]
+ mov esi,DWORD [28+esp]
+ lea edx,[_OPENSSL_ia32cap_P]
+ lea edi,[8+edi]
+ lea esi,[ebp*1+esi]
+ neg ebp
+ xor eax,eax
+ mov DWORD [edi-4],ebp
+ bt DWORD [edx],20
+ jc NEAR L$009c1stloop
+align 16
+L$010w1stloop:
+ mov DWORD [eax*4+edi],eax
+ add al,1
+ jnc NEAR L$010w1stloop
+ xor ecx,ecx
+ xor edx,edx
+align 16
+L$011w2ndloop:
+ mov eax,DWORD [ecx*4+edi]
+ add dl,BYTE [ebp*1+esi]
+ add dl,al
+ add ebp,1
+ mov ebx,DWORD [edx*4+edi]
+ jnz NEAR L$012wnowrap
+ mov ebp,DWORD [edi-4]
+L$012wnowrap:
+ mov DWORD [edx*4+edi],eax
+ mov DWORD [ecx*4+edi],ebx
+ add cl,1
+ jnc NEAR L$011w2ndloop
+ jmp NEAR L$013exit
+align 16
+L$009c1stloop:
+ mov BYTE [eax*1+edi],al
+ add al,1
+ jnc NEAR L$009c1stloop
+ xor ecx,ecx
+ xor edx,edx
+ xor ebx,ebx
+align 16
+L$014c2ndloop:
+ mov al,BYTE [ecx*1+edi]
+ add dl,BYTE [ebp*1+esi]
+ add dl,al
+ add ebp,1
+ mov bl,BYTE [edx*1+edi]
+ jnz NEAR L$015cnowrap
+ mov ebp,DWORD [edi-4]
+L$015cnowrap:
+ mov BYTE [edx*1+edi],al
+ mov BYTE [ecx*1+edi],bl
+ add cl,1
+ jnc NEAR L$014c2ndloop
+ mov DWORD [256+edi],-1
+L$013exit:
+ xor eax,eax
+ mov DWORD [edi-8],eax
+ mov DWORD [edi-4],eax
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _RC4_options
+align 16
+_RC4_options:
+L$_RC4_options_begin:
+ call L$016pic_point
+L$016pic_point:
+ pop eax
+ lea eax,[(L$017opts-L$016pic_point)+eax]
+ lea edx,[_OPENSSL_ia32cap_P]
+ mov edx,DWORD [edx]
+ bt edx,20
+ jc NEAR L$0181xchar
+ bt edx,26
+ jnc NEAR L$019ret
+ add eax,25
+ ret
+L$0181xchar:
+ add eax,12
+L$019ret:
+ ret
+align 64
+L$017opts:
+db 114,99,52,40,52,120,44,105,110,116,41,0
+db 114,99,52,40,49,120,44,99,104,97,114,41,0
+db 114,99,52,40,56,120,44,109,109,120,41,0
+db 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
+db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
+db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+align 64
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/sha/sha1-586.asm b/win-x86/crypto/sha/sha1-586.asm
new file mode 100644
index 0000000..e24449d
--- /dev/null
+++ b/win-x86/crypto/sha/sha1-586.asm
@@ -0,0 +1,2805 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _sha1_block_data_order
+align 16
+_sha1_block_data_order:
+L$_sha1_block_data_order_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ call L$000pic_point
+L$000pic_point:
+ pop ebp
+ lea esi,[_OPENSSL_ia32cap_P]
+ lea ebp,[(L$K_XX_XX-L$000pic_point)+ebp]
+ mov eax,DWORD [esi]
+ mov edx,DWORD [4+esi]
+ test edx,512
+ jz NEAR L$001x86
+ mov ecx,DWORD [8+esi]
+ test eax,16777216
+ jz NEAR L$001x86
+ test ecx,536870912
+ jnz NEAR L$shaext_shortcut
+ jmp NEAR L$ssse3_shortcut
+align 16
+L$001x86:
+ mov ebp,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ sub esp,76
+ shl eax,6
+ add eax,esi
+ mov DWORD [104+esp],eax
+ mov edi,DWORD [16+ebp]
+ jmp NEAR L$002loop
+align 16
+L$002loop:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ mov DWORD [8+esp],ecx
+ mov DWORD [12+esp],edx
+ mov eax,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [28+esi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ mov DWORD [16+esp],eax
+ mov DWORD [20+esp],ebx
+ mov DWORD [24+esp],ecx
+ mov DWORD [28+esp],edx
+ mov eax,DWORD [32+esi]
+ mov ebx,DWORD [36+esi]
+ mov ecx,DWORD [40+esi]
+ mov edx,DWORD [44+esi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ mov DWORD [32+esp],eax
+ mov DWORD [36+esp],ebx
+ mov DWORD [40+esp],ecx
+ mov DWORD [44+esp],edx
+ mov eax,DWORD [48+esi]
+ mov ebx,DWORD [52+esi]
+ mov ecx,DWORD [56+esi]
+ mov edx,DWORD [60+esi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ mov DWORD [48+esp],eax
+ mov DWORD [52+esp],ebx
+ mov DWORD [56+esp],ecx
+ mov DWORD [60+esp],edx
+ mov DWORD [100+esp],esi
+ mov eax,DWORD [ebp]
+ mov ebx,DWORD [4+ebp]
+ mov ecx,DWORD [8+ebp]
+ mov edx,DWORD [12+ebp]
+ ; 00_15 0
+ mov esi,ecx
+ mov ebp,eax
+ rol ebp,5
+ xor esi,edx
+ add ebp,edi
+ mov edi,DWORD [esp]
+ and esi,ebx
+ ror ebx,2
+ xor esi,edx
+ lea ebp,[1518500249+edi*1+ebp]
+ add ebp,esi
+ ; 00_15 1
+ mov edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ecx
+ add ebp,edx
+ mov edx,DWORD [4+esp]
+ and edi,eax
+ ror eax,2
+ xor edi,ecx
+ lea ebp,[1518500249+edx*1+ebp]
+ add ebp,edi
+ ; 00_15 2
+ mov edx,eax
+ mov edi,ebp
+ rol ebp,5
+ xor edx,ebx
+ add ebp,ecx
+ mov ecx,DWORD [8+esp]
+ and edx,esi
+ ror esi,2
+ xor edx,ebx
+ lea ebp,[1518500249+ecx*1+ebp]
+ add ebp,edx
+ ; 00_15 3
+ mov ecx,esi
+ mov edx,ebp
+ rol ebp,5
+ xor ecx,eax
+ add ebp,ebx
+ mov ebx,DWORD [12+esp]
+ and ecx,edi
+ ror edi,2
+ xor ecx,eax
+ lea ebp,[1518500249+ebx*1+ebp]
+ add ebp,ecx
+ ; 00_15 4
+ mov ebx,edi
+ mov ecx,ebp
+ rol ebp,5
+ xor ebx,esi
+ add ebp,eax
+ mov eax,DWORD [16+esp]
+ and ebx,edx
+ ror edx,2
+ xor ebx,esi
+ lea ebp,[1518500249+eax*1+ebp]
+ add ebp,ebx
+ ; 00_15 5
+ mov eax,edx
+ mov ebx,ebp
+ rol ebp,5
+ xor eax,edi
+ add ebp,esi
+ mov esi,DWORD [20+esp]
+ and eax,ecx
+ ror ecx,2
+ xor eax,edi
+ lea ebp,[1518500249+esi*1+ebp]
+ add ebp,eax
+ ; 00_15 6
+ mov esi,ecx
+ mov eax,ebp
+ rol ebp,5
+ xor esi,edx
+ add ebp,edi
+ mov edi,DWORD [24+esp]
+ and esi,ebx
+ ror ebx,2
+ xor esi,edx
+ lea ebp,[1518500249+edi*1+ebp]
+ add ebp,esi
+ ; 00_15 7
+ mov edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ecx
+ add ebp,edx
+ mov edx,DWORD [28+esp]
+ and edi,eax
+ ror eax,2
+ xor edi,ecx
+ lea ebp,[1518500249+edx*1+ebp]
+ add ebp,edi
+ ; 00_15 8
+ mov edx,eax
+ mov edi,ebp
+ rol ebp,5
+ xor edx,ebx
+ add ebp,ecx
+ mov ecx,DWORD [32+esp]
+ and edx,esi
+ ror esi,2
+ xor edx,ebx
+ lea ebp,[1518500249+ecx*1+ebp]
+ add ebp,edx
+ ; 00_15 9
+ mov ecx,esi
+ mov edx,ebp
+ rol ebp,5
+ xor ecx,eax
+ add ebp,ebx
+ mov ebx,DWORD [36+esp]
+ and ecx,edi
+ ror edi,2
+ xor ecx,eax
+ lea ebp,[1518500249+ebx*1+ebp]
+ add ebp,ecx
+ ; 00_15 10
+ mov ebx,edi
+ mov ecx,ebp
+ rol ebp,5
+ xor ebx,esi
+ add ebp,eax
+ mov eax,DWORD [40+esp]
+ and ebx,edx
+ ror edx,2
+ xor ebx,esi
+ lea ebp,[1518500249+eax*1+ebp]
+ add ebp,ebx
+ ; 00_15 11
+ mov eax,edx
+ mov ebx,ebp
+ rol ebp,5
+ xor eax,edi
+ add ebp,esi
+ mov esi,DWORD [44+esp]
+ and eax,ecx
+ ror ecx,2
+ xor eax,edi
+ lea ebp,[1518500249+esi*1+ebp]
+ add ebp,eax
+ ; 00_15 12
+ mov esi,ecx
+ mov eax,ebp
+ rol ebp,5
+ xor esi,edx
+ add ebp,edi
+ mov edi,DWORD [48+esp]
+ and esi,ebx
+ ror ebx,2
+ xor esi,edx
+ lea ebp,[1518500249+edi*1+ebp]
+ add ebp,esi
+ ; 00_15 13
+ mov edi,ebx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ecx
+ add ebp,edx
+ mov edx,DWORD [52+esp]
+ and edi,eax
+ ror eax,2
+ xor edi,ecx
+ lea ebp,[1518500249+edx*1+ebp]
+ add ebp,edi
+ ; 00_15 14
+ mov edx,eax
+ mov edi,ebp
+ rol ebp,5
+ xor edx,ebx
+ add ebp,ecx
+ mov ecx,DWORD [56+esp]
+ and edx,esi
+ ror esi,2
+ xor edx,ebx
+ lea ebp,[1518500249+ecx*1+ebp]
+ add ebp,edx
+ ; 00_15 15
+ mov ecx,esi
+ mov edx,ebp
+ rol ebp,5
+ xor ecx,eax
+ add ebp,ebx
+ mov ebx,DWORD [60+esp]
+ and ecx,edi
+ ror edi,2
+ xor ecx,eax
+ lea ebp,[1518500249+ebx*1+ebp]
+ mov ebx,DWORD [esp]
+ add ecx,ebp
+ ; 16_19 16
+ mov ebp,edi
+ xor ebx,DWORD [8+esp]
+ xor ebp,esi
+ xor ebx,DWORD [32+esp]
+ and ebp,edx
+ xor ebx,DWORD [52+esp]
+ rol ebx,1
+ xor ebp,esi
+ add eax,ebp
+ mov ebp,ecx
+ ror edx,2
+ mov DWORD [esp],ebx
+ rol ebp,5
+ lea ebx,[1518500249+eax*1+ebx]
+ mov eax,DWORD [4+esp]
+ add ebx,ebp
+ ; 16_19 17
+ mov ebp,edx
+ xor eax,DWORD [12+esp]
+ xor ebp,edi
+ xor eax,DWORD [36+esp]
+ and ebp,ecx
+ xor eax,DWORD [56+esp]
+ rol eax,1
+ xor ebp,edi
+ add esi,ebp
+ mov ebp,ebx
+ ror ecx,2
+ mov DWORD [4+esp],eax
+ rol ebp,5
+ lea eax,[1518500249+esi*1+eax]
+ mov esi,DWORD [8+esp]
+ add eax,ebp
+ ; 16_19 18
+ mov ebp,ecx
+ xor esi,DWORD [16+esp]
+ xor ebp,edx
+ xor esi,DWORD [40+esp]
+ and ebp,ebx
+ xor esi,DWORD [60+esp]
+ rol esi,1
+ xor ebp,edx
+ add edi,ebp
+ mov ebp,eax
+ ror ebx,2
+ mov DWORD [8+esp],esi
+ rol ebp,5
+ lea esi,[1518500249+edi*1+esi]
+ mov edi,DWORD [12+esp]
+ add esi,ebp
+ ; 16_19 19
+ mov ebp,ebx
+ xor edi,DWORD [20+esp]
+ xor ebp,ecx
+ xor edi,DWORD [44+esp]
+ and ebp,eax
+ xor edi,DWORD [esp]
+ rol edi,1
+ xor ebp,ecx
+ add edx,ebp
+ mov ebp,esi
+ ror eax,2
+ mov DWORD [12+esp],edi
+ rol ebp,5
+ lea edi,[1518500249+edx*1+edi]
+ mov edx,DWORD [16+esp]
+ add edi,ebp
+ ; 20_39 20
+ mov ebp,esi
+ xor edx,DWORD [24+esp]
+ xor ebp,eax
+ xor edx,DWORD [48+esp]
+ xor ebp,ebx
+ xor edx,DWORD [4+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [16+esp],edx
+ lea edx,[1859775393+ecx*1+edx]
+ mov ecx,DWORD [20+esp]
+ add edx,ebp
+ ; 20_39 21
+ mov ebp,edi
+ xor ecx,DWORD [28+esp]
+ xor ebp,esi
+ xor ecx,DWORD [52+esp]
+ xor ebp,eax
+ xor ecx,DWORD [8+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [20+esp],ecx
+ lea ecx,[1859775393+ebx*1+ecx]
+ mov ebx,DWORD [24+esp]
+ add ecx,ebp
+ ; 20_39 22
+ mov ebp,edx
+ xor ebx,DWORD [32+esp]
+ xor ebp,edi
+ xor ebx,DWORD [56+esp]
+ xor ebp,esi
+ xor ebx,DWORD [12+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [24+esp],ebx
+ lea ebx,[1859775393+eax*1+ebx]
+ mov eax,DWORD [28+esp]
+ add ebx,ebp
+ ; 20_39 23
+ mov ebp,ecx
+ xor eax,DWORD [36+esp]
+ xor ebp,edx
+ xor eax,DWORD [60+esp]
+ xor ebp,edi
+ xor eax,DWORD [16+esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ mov DWORD [28+esp],eax
+ lea eax,[1859775393+esi*1+eax]
+ mov esi,DWORD [32+esp]
+ add eax,ebp
+ ; 20_39 24
+ mov ebp,ebx
+ xor esi,DWORD [40+esp]
+ xor ebp,ecx
+ xor esi,DWORD [esp]
+ xor ebp,edx
+ xor esi,DWORD [20+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [32+esp],esi
+ lea esi,[1859775393+edi*1+esi]
+ mov edi,DWORD [36+esp]
+ add esi,ebp
+ ; 20_39 25
+ mov ebp,eax
+ xor edi,DWORD [44+esp]
+ xor ebp,ebx
+ xor edi,DWORD [4+esp]
+ xor ebp,ecx
+ xor edi,DWORD [24+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [36+esp],edi
+ lea edi,[1859775393+edx*1+edi]
+ mov edx,DWORD [40+esp]
+ add edi,ebp
+ ; 20_39 26
+ mov ebp,esi
+ xor edx,DWORD [48+esp]
+ xor ebp,eax
+ xor edx,DWORD [8+esp]
+ xor ebp,ebx
+ xor edx,DWORD [28+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [40+esp],edx
+ lea edx,[1859775393+ecx*1+edx]
+ mov ecx,DWORD [44+esp]
+ add edx,ebp
+ ; 20_39 27
+ mov ebp,edi
+ xor ecx,DWORD [52+esp]
+ xor ebp,esi
+ xor ecx,DWORD [12+esp]
+ xor ebp,eax
+ xor ecx,DWORD [32+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [44+esp],ecx
+ lea ecx,[1859775393+ebx*1+ecx]
+ mov ebx,DWORD [48+esp]
+ add ecx,ebp
+ ; 20_39 28
+ mov ebp,edx
+ xor ebx,DWORD [56+esp]
+ xor ebp,edi
+ xor ebx,DWORD [16+esp]
+ xor ebp,esi
+ xor ebx,DWORD [36+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [48+esp],ebx
+ lea ebx,[1859775393+eax*1+ebx]
+ mov eax,DWORD [52+esp]
+ add ebx,ebp
+ ; 20_39 29
+ mov ebp,ecx
+ xor eax,DWORD [60+esp]
+ xor ebp,edx
+ xor eax,DWORD [20+esp]
+ xor ebp,edi
+ xor eax,DWORD [40+esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ mov DWORD [52+esp],eax
+ lea eax,[1859775393+esi*1+eax]
+ mov esi,DWORD [56+esp]
+ add eax,ebp
+ ; 20_39 30
+ mov ebp,ebx
+ xor esi,DWORD [esp]
+ xor ebp,ecx
+ xor esi,DWORD [24+esp]
+ xor ebp,edx
+ xor esi,DWORD [44+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [56+esp],esi
+ lea esi,[1859775393+edi*1+esi]
+ mov edi,DWORD [60+esp]
+ add esi,ebp
+ ; 20_39 31
+ mov ebp,eax
+ xor edi,DWORD [4+esp]
+ xor ebp,ebx
+ xor edi,DWORD [28+esp]
+ xor ebp,ecx
+ xor edi,DWORD [48+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [60+esp],edi
+ lea edi,[1859775393+edx*1+edi]
+ mov edx,DWORD [esp]
+ add edi,ebp
+ ; 20_39 32
+ mov ebp,esi
+ xor edx,DWORD [8+esp]
+ xor ebp,eax
+ xor edx,DWORD [32+esp]
+ xor ebp,ebx
+ xor edx,DWORD [52+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [esp],edx
+ lea edx,[1859775393+ecx*1+edx]
+ mov ecx,DWORD [4+esp]
+ add edx,ebp
+ ; 20_39 33
+ mov ebp,edi
+ xor ecx,DWORD [12+esp]
+ xor ebp,esi
+ xor ecx,DWORD [36+esp]
+ xor ebp,eax
+ xor ecx,DWORD [56+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [4+esp],ecx
+ lea ecx,[1859775393+ebx*1+ecx]
+ mov ebx,DWORD [8+esp]
+ add ecx,ebp
+ ; 20_39 34
+ mov ebp,edx
+ xor ebx,DWORD [16+esp]
+ xor ebp,edi
+ xor ebx,DWORD [40+esp]
+ xor ebp,esi
+ xor ebx,DWORD [60+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [8+esp],ebx
+ lea ebx,[1859775393+eax*1+ebx]
+ mov eax,DWORD [12+esp]
+ add ebx,ebp
+ ; 20_39 35
+ mov ebp,ecx
+ xor eax,DWORD [20+esp]
+ xor ebp,edx
+ xor eax,DWORD [44+esp]
+ xor ebp,edi
+ xor eax,DWORD [esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ mov DWORD [12+esp],eax
+ lea eax,[1859775393+esi*1+eax]
+ mov esi,DWORD [16+esp]
+ add eax,ebp
+ ; 20_39 36
+ mov ebp,ebx
+ xor esi,DWORD [24+esp]
+ xor ebp,ecx
+ xor esi,DWORD [48+esp]
+ xor ebp,edx
+ xor esi,DWORD [4+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [16+esp],esi
+ lea esi,[1859775393+edi*1+esi]
+ mov edi,DWORD [20+esp]
+ add esi,ebp
+ ; 20_39 37
+ mov ebp,eax
+ xor edi,DWORD [28+esp]
+ xor ebp,ebx
+ xor edi,DWORD [52+esp]
+ xor ebp,ecx
+ xor edi,DWORD [8+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [20+esp],edi
+ lea edi,[1859775393+edx*1+edi]
+ mov edx,DWORD [24+esp]
+ add edi,ebp
+ ; 20_39 38
+ mov ebp,esi
+ xor edx,DWORD [32+esp]
+ xor ebp,eax
+ xor edx,DWORD [56+esp]
+ xor ebp,ebx
+ xor edx,DWORD [12+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [24+esp],edx
+ lea edx,[1859775393+ecx*1+edx]
+ mov ecx,DWORD [28+esp]
+ add edx,ebp
+ ; 20_39 39
+ mov ebp,edi
+ xor ecx,DWORD [36+esp]
+ xor ebp,esi
+ xor ecx,DWORD [60+esp]
+ xor ebp,eax
+ xor ecx,DWORD [16+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [28+esp],ecx
+ lea ecx,[1859775393+ebx*1+ecx]
+ mov ebx,DWORD [32+esp]
+ add ecx,ebp
+ ; 40_59 40
+ mov ebp,edi
+ xor ebx,DWORD [40+esp]
+ xor ebp,esi
+ xor ebx,DWORD [esp]
+ and ebp,edx
+ xor ebx,DWORD [20+esp]
+ rol ebx,1
+ add ebp,eax
+ ror edx,2
+ mov eax,ecx
+ rol eax,5
+ mov DWORD [32+esp],ebx
+ lea ebx,[2400959708+ebp*1+ebx]
+ mov ebp,edi
+ add ebx,eax
+ and ebp,esi
+ mov eax,DWORD [36+esp]
+ add ebx,ebp
+ ; 40_59 41
+ mov ebp,edx
+ xor eax,DWORD [44+esp]
+ xor ebp,edi
+ xor eax,DWORD [4+esp]
+ and ebp,ecx
+ xor eax,DWORD [24+esp]
+ rol eax,1
+ add ebp,esi
+ ror ecx,2
+ mov esi,ebx
+ rol esi,5
+ mov DWORD [36+esp],eax
+ lea eax,[2400959708+ebp*1+eax]
+ mov ebp,edx
+ add eax,esi
+ and ebp,edi
+ mov esi,DWORD [40+esp]
+ add eax,ebp
+ ; 40_59 42
+ mov ebp,ecx
+ xor esi,DWORD [48+esp]
+ xor ebp,edx
+ xor esi,DWORD [8+esp]
+ and ebp,ebx
+ xor esi,DWORD [28+esp]
+ rol esi,1
+ add ebp,edi
+ ror ebx,2
+ mov edi,eax
+ rol edi,5
+ mov DWORD [40+esp],esi
+ lea esi,[2400959708+ebp*1+esi]
+ mov ebp,ecx
+ add esi,edi
+ and ebp,edx
+ mov edi,DWORD [44+esp]
+ add esi,ebp
+ ; 40_59 43
+ mov ebp,ebx
+ xor edi,DWORD [52+esp]
+ xor ebp,ecx
+ xor edi,DWORD [12+esp]
+ and ebp,eax
+ xor edi,DWORD [32+esp]
+ rol edi,1
+ add ebp,edx
+ ror eax,2
+ mov edx,esi
+ rol edx,5
+ mov DWORD [44+esp],edi
+ lea edi,[2400959708+ebp*1+edi]
+ mov ebp,ebx
+ add edi,edx
+ and ebp,ecx
+ mov edx,DWORD [48+esp]
+ add edi,ebp
+ ; 40_59 44
+ mov ebp,eax
+ xor edx,DWORD [56+esp]
+ xor ebp,ebx
+ xor edx,DWORD [16+esp]
+ and ebp,esi
+ xor edx,DWORD [36+esp]
+ rol edx,1
+ add ebp,ecx
+ ror esi,2
+ mov ecx,edi
+ rol ecx,5
+ mov DWORD [48+esp],edx
+ lea edx,[2400959708+ebp*1+edx]
+ mov ebp,eax
+ add edx,ecx
+ and ebp,ebx
+ mov ecx,DWORD [52+esp]
+ add edx,ebp
+ ; 40_59 45
+ mov ebp,esi
+ xor ecx,DWORD [60+esp]
+ xor ebp,eax
+ xor ecx,DWORD [20+esp]
+ and ebp,edi
+ xor ecx,DWORD [40+esp]
+ rol ecx,1
+ add ebp,ebx
+ ror edi,2
+ mov ebx,edx
+ rol ebx,5
+ mov DWORD [52+esp],ecx
+ lea ecx,[2400959708+ebp*1+ecx]
+ mov ebp,esi
+ add ecx,ebx
+ and ebp,eax
+ mov ebx,DWORD [56+esp]
+ add ecx,ebp
+ ; 40_59 46
+ mov ebp,edi
+ xor ebx,DWORD [esp]
+ xor ebp,esi
+ xor ebx,DWORD [24+esp]
+ and ebp,edx
+ xor ebx,DWORD [44+esp]
+ rol ebx,1
+ add ebp,eax
+ ror edx,2
+ mov eax,ecx
+ rol eax,5
+ mov DWORD [56+esp],ebx
+ lea ebx,[2400959708+ebp*1+ebx]
+ mov ebp,edi
+ add ebx,eax
+ and ebp,esi
+ mov eax,DWORD [60+esp]
+ add ebx,ebp
+ ; 40_59 47
+ mov ebp,edx
+ xor eax,DWORD [4+esp]
+ xor ebp,edi
+ xor eax,DWORD [28+esp]
+ and ebp,ecx
+ xor eax,DWORD [48+esp]
+ rol eax,1
+ add ebp,esi
+ ror ecx,2
+ mov esi,ebx
+ rol esi,5
+ mov DWORD [60+esp],eax
+ lea eax,[2400959708+ebp*1+eax]
+ mov ebp,edx
+ add eax,esi
+ and ebp,edi
+ mov esi,DWORD [esp]
+ add eax,ebp
+ ; 40_59 48
+ mov ebp,ecx
+ xor esi,DWORD [8+esp]
+ xor ebp,edx
+ xor esi,DWORD [32+esp]
+ and ebp,ebx
+ xor esi,DWORD [52+esp]
+ rol esi,1
+ add ebp,edi
+ ror ebx,2
+ mov edi,eax
+ rol edi,5
+ mov DWORD [esp],esi
+ lea esi,[2400959708+ebp*1+esi]
+ mov ebp,ecx
+ add esi,edi
+ and ebp,edx
+ mov edi,DWORD [4+esp]
+ add esi,ebp
+ ; 40_59 49
+ mov ebp,ebx
+ xor edi,DWORD [12+esp]
+ xor ebp,ecx
+ xor edi,DWORD [36+esp]
+ and ebp,eax
+ xor edi,DWORD [56+esp]
+ rol edi,1
+ add ebp,edx
+ ror eax,2
+ mov edx,esi
+ rol edx,5
+ mov DWORD [4+esp],edi
+ lea edi,[2400959708+ebp*1+edi]
+ mov ebp,ebx
+ add edi,edx
+ and ebp,ecx
+ mov edx,DWORD [8+esp]
+ add edi,ebp
+ ; 40_59 50
+ mov ebp,eax
+ xor edx,DWORD [16+esp]
+ xor ebp,ebx
+ xor edx,DWORD [40+esp]
+ and ebp,esi
+ xor edx,DWORD [60+esp]
+ rol edx,1
+ add ebp,ecx
+ ror esi,2
+ mov ecx,edi
+ rol ecx,5
+ mov DWORD [8+esp],edx
+ lea edx,[2400959708+ebp*1+edx]
+ mov ebp,eax
+ add edx,ecx
+ and ebp,ebx
+ mov ecx,DWORD [12+esp]
+ add edx,ebp
+ ; 40_59 51
+ mov ebp,esi
+ xor ecx,DWORD [20+esp]
+ xor ebp,eax
+ xor ecx,DWORD [44+esp]
+ and ebp,edi
+ xor ecx,DWORD [esp]
+ rol ecx,1
+ add ebp,ebx
+ ror edi,2
+ mov ebx,edx
+ rol ebx,5
+ mov DWORD [12+esp],ecx
+ lea ecx,[2400959708+ebp*1+ecx]
+ mov ebp,esi
+ add ecx,ebx
+ and ebp,eax
+ mov ebx,DWORD [16+esp]
+ add ecx,ebp
+ ; 40_59 52
+ mov ebp,edi
+ xor ebx,DWORD [24+esp]
+ xor ebp,esi
+ xor ebx,DWORD [48+esp]
+ and ebp,edx
+ xor ebx,DWORD [4+esp]
+ rol ebx,1
+ add ebp,eax
+ ror edx,2
+ mov eax,ecx
+ rol eax,5
+ mov DWORD [16+esp],ebx
+ lea ebx,[2400959708+ebp*1+ebx]
+ mov ebp,edi
+ add ebx,eax
+ and ebp,esi
+ mov eax,DWORD [20+esp]
+ add ebx,ebp
+ ; 40_59 53
+ mov ebp,edx
+ xor eax,DWORD [28+esp]
+ xor ebp,edi
+ xor eax,DWORD [52+esp]
+ and ebp,ecx
+ xor eax,DWORD [8+esp]
+ rol eax,1
+ add ebp,esi
+ ror ecx,2
+ mov esi,ebx
+ rol esi,5
+ mov DWORD [20+esp],eax
+ lea eax,[2400959708+ebp*1+eax]
+ mov ebp,edx
+ add eax,esi
+ and ebp,edi
+ mov esi,DWORD [24+esp]
+ add eax,ebp
+ ; 40_59 54
+ mov ebp,ecx
+ xor esi,DWORD [32+esp]
+ xor ebp,edx
+ xor esi,DWORD [56+esp]
+ and ebp,ebx
+ xor esi,DWORD [12+esp]
+ rol esi,1
+ add ebp,edi
+ ror ebx,2
+ mov edi,eax
+ rol edi,5
+ mov DWORD [24+esp],esi
+ lea esi,[2400959708+ebp*1+esi]
+ mov ebp,ecx
+ add esi,edi
+ and ebp,edx
+ mov edi,DWORD [28+esp]
+ add esi,ebp
+ ; 40_59 55
+ mov ebp,ebx
+ xor edi,DWORD [36+esp]
+ xor ebp,ecx
+ xor edi,DWORD [60+esp]
+ and ebp,eax
+ xor edi,DWORD [16+esp]
+ rol edi,1
+ add ebp,edx
+ ror eax,2
+ mov edx,esi
+ rol edx,5
+ mov DWORD [28+esp],edi
+ lea edi,[2400959708+ebp*1+edi]
+ mov ebp,ebx
+ add edi,edx
+ and ebp,ecx
+ mov edx,DWORD [32+esp]
+ add edi,ebp
+ ; 40_59 56
+ mov ebp,eax
+ xor edx,DWORD [40+esp]
+ xor ebp,ebx
+ xor edx,DWORD [esp]
+ and ebp,esi
+ xor edx,DWORD [20+esp]
+ rol edx,1
+ add ebp,ecx
+ ror esi,2
+ mov ecx,edi
+ rol ecx,5
+ mov DWORD [32+esp],edx
+ lea edx,[2400959708+ebp*1+edx]
+ mov ebp,eax
+ add edx,ecx
+ and ebp,ebx
+ mov ecx,DWORD [36+esp]
+ add edx,ebp
+ ; 40_59 57
+ mov ebp,esi
+ xor ecx,DWORD [44+esp]
+ xor ebp,eax
+ xor ecx,DWORD [4+esp]
+ and ebp,edi
+ xor ecx,DWORD [24+esp]
+ rol ecx,1
+ add ebp,ebx
+ ror edi,2
+ mov ebx,edx
+ rol ebx,5
+ mov DWORD [36+esp],ecx
+ lea ecx,[2400959708+ebp*1+ecx]
+ mov ebp,esi
+ add ecx,ebx
+ and ebp,eax
+ mov ebx,DWORD [40+esp]
+ add ecx,ebp
+ ; 40_59 58
+ mov ebp,edi
+ xor ebx,DWORD [48+esp]
+ xor ebp,esi
+ xor ebx,DWORD [8+esp]
+ and ebp,edx
+ xor ebx,DWORD [28+esp]
+ rol ebx,1
+ add ebp,eax
+ ror edx,2
+ mov eax,ecx
+ rol eax,5
+ mov DWORD [40+esp],ebx
+ lea ebx,[2400959708+ebp*1+ebx]
+ mov ebp,edi
+ add ebx,eax
+ and ebp,esi
+ mov eax,DWORD [44+esp]
+ add ebx,ebp
+ ; 40_59 59
+ mov ebp,edx
+ xor eax,DWORD [52+esp]
+ xor ebp,edi
+ xor eax,DWORD [12+esp]
+ and ebp,ecx
+ xor eax,DWORD [32+esp]
+ rol eax,1
+ add ebp,esi
+ ror ecx,2
+ mov esi,ebx
+ rol esi,5
+ mov DWORD [44+esp],eax
+ lea eax,[2400959708+ebp*1+eax]
+ mov ebp,edx
+ add eax,esi
+ and ebp,edi
+ mov esi,DWORD [48+esp]
+ add eax,ebp
+ ; 20_39 60
+ mov ebp,ebx
+ xor esi,DWORD [56+esp]
+ xor ebp,ecx
+ xor esi,DWORD [16+esp]
+ xor ebp,edx
+ xor esi,DWORD [36+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [48+esp],esi
+ lea esi,[3395469782+edi*1+esi]
+ mov edi,DWORD [52+esp]
+ add esi,ebp
+ ; 20_39 61
+ mov ebp,eax
+ xor edi,DWORD [60+esp]
+ xor ebp,ebx
+ xor edi,DWORD [20+esp]
+ xor ebp,ecx
+ xor edi,DWORD [40+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [52+esp],edi
+ lea edi,[3395469782+edx*1+edi]
+ mov edx,DWORD [56+esp]
+ add edi,ebp
+ ; 20_39 62
+ mov ebp,esi
+ xor edx,DWORD [esp]
+ xor ebp,eax
+ xor edx,DWORD [24+esp]
+ xor ebp,ebx
+ xor edx,DWORD [44+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [56+esp],edx
+ lea edx,[3395469782+ecx*1+edx]
+ mov ecx,DWORD [60+esp]
+ add edx,ebp
+ ; 20_39 63
+ mov ebp,edi
+ xor ecx,DWORD [4+esp]
+ xor ebp,esi
+ xor ecx,DWORD [28+esp]
+ xor ebp,eax
+ xor ecx,DWORD [48+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [60+esp],ecx
+ lea ecx,[3395469782+ebx*1+ecx]
+ mov ebx,DWORD [esp]
+ add ecx,ebp
+ ; 20_39 64
+ mov ebp,edx
+ xor ebx,DWORD [8+esp]
+ xor ebp,edi
+ xor ebx,DWORD [32+esp]
+ xor ebp,esi
+ xor ebx,DWORD [52+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [esp],ebx
+ lea ebx,[3395469782+eax*1+ebx]
+ mov eax,DWORD [4+esp]
+ add ebx,ebp
+ ; 20_39 65
+ mov ebp,ecx
+ xor eax,DWORD [12+esp]
+ xor ebp,edx
+ xor eax,DWORD [36+esp]
+ xor ebp,edi
+ xor eax,DWORD [56+esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ mov DWORD [4+esp],eax
+ lea eax,[3395469782+esi*1+eax]
+ mov esi,DWORD [8+esp]
+ add eax,ebp
+ ; 20_39 66
+ mov ebp,ebx
+ xor esi,DWORD [16+esp]
+ xor ebp,ecx
+ xor esi,DWORD [40+esp]
+ xor ebp,edx
+ xor esi,DWORD [60+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [8+esp],esi
+ lea esi,[3395469782+edi*1+esi]
+ mov edi,DWORD [12+esp]
+ add esi,ebp
+ ; 20_39 67
+ mov ebp,eax
+ xor edi,DWORD [20+esp]
+ xor ebp,ebx
+ xor edi,DWORD [44+esp]
+ xor ebp,ecx
+ xor edi,DWORD [esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [12+esp],edi
+ lea edi,[3395469782+edx*1+edi]
+ mov edx,DWORD [16+esp]
+ add edi,ebp
+ ; 20_39 68
+ mov ebp,esi
+ xor edx,DWORD [24+esp]
+ xor ebp,eax
+ xor edx,DWORD [48+esp]
+ xor ebp,ebx
+ xor edx,DWORD [4+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [16+esp],edx
+ lea edx,[3395469782+ecx*1+edx]
+ mov ecx,DWORD [20+esp]
+ add edx,ebp
+ ; 20_39 69
+ mov ebp,edi
+ xor ecx,DWORD [28+esp]
+ xor ebp,esi
+ xor ecx,DWORD [52+esp]
+ xor ebp,eax
+ xor ecx,DWORD [8+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [20+esp],ecx
+ lea ecx,[3395469782+ebx*1+ecx]
+ mov ebx,DWORD [24+esp]
+ add ecx,ebp
+ ; 20_39 70
+ mov ebp,edx
+ xor ebx,DWORD [32+esp]
+ xor ebp,edi
+ xor ebx,DWORD [56+esp]
+ xor ebp,esi
+ xor ebx,DWORD [12+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [24+esp],ebx
+ lea ebx,[3395469782+eax*1+ebx]
+ mov eax,DWORD [28+esp]
+ add ebx,ebp
+ ; 20_39 71
+ mov ebp,ecx
+ xor eax,DWORD [36+esp]
+ xor ebp,edx
+ xor eax,DWORD [60+esp]
+ xor ebp,edi
+ xor eax,DWORD [16+esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ mov DWORD [28+esp],eax
+ lea eax,[3395469782+esi*1+eax]
+ mov esi,DWORD [32+esp]
+ add eax,ebp
+ ; 20_39 72
+ mov ebp,ebx
+ xor esi,DWORD [40+esp]
+ xor ebp,ecx
+ xor esi,DWORD [esp]
+ xor ebp,edx
+ xor esi,DWORD [20+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ mov DWORD [32+esp],esi
+ lea esi,[3395469782+edi*1+esi]
+ mov edi,DWORD [36+esp]
+ add esi,ebp
+ ; 20_39 73
+ mov ebp,eax
+ xor edi,DWORD [44+esp]
+ xor ebp,ebx
+ xor edi,DWORD [4+esp]
+ xor ebp,ecx
+ xor edi,DWORD [24+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ mov DWORD [36+esp],edi
+ lea edi,[3395469782+edx*1+edi]
+ mov edx,DWORD [40+esp]
+ add edi,ebp
+ ; 20_39 74
+ mov ebp,esi
+ xor edx,DWORD [48+esp]
+ xor ebp,eax
+ xor edx,DWORD [8+esp]
+ xor ebp,ebx
+ xor edx,DWORD [28+esp]
+ rol edx,1
+ add ecx,ebp
+ ror esi,2
+ mov ebp,edi
+ rol ebp,5
+ mov DWORD [40+esp],edx
+ lea edx,[3395469782+ecx*1+edx]
+ mov ecx,DWORD [44+esp]
+ add edx,ebp
+ ; 20_39 75
+ mov ebp,edi
+ xor ecx,DWORD [52+esp]
+ xor ebp,esi
+ xor ecx,DWORD [12+esp]
+ xor ebp,eax
+ xor ecx,DWORD [32+esp]
+ rol ecx,1
+ add ebx,ebp
+ ror edi,2
+ mov ebp,edx
+ rol ebp,5
+ mov DWORD [44+esp],ecx
+ lea ecx,[3395469782+ebx*1+ecx]
+ mov ebx,DWORD [48+esp]
+ add ecx,ebp
+ ; 20_39 76
+ mov ebp,edx
+ xor ebx,DWORD [56+esp]
+ xor ebp,edi
+ xor ebx,DWORD [16+esp]
+ xor ebp,esi
+ xor ebx,DWORD [36+esp]
+ rol ebx,1
+ add eax,ebp
+ ror edx,2
+ mov ebp,ecx
+ rol ebp,5
+ mov DWORD [48+esp],ebx
+ lea ebx,[3395469782+eax*1+ebx]
+ mov eax,DWORD [52+esp]
+ add ebx,ebp
+ ; 20_39 77
+ mov ebp,ecx
+ xor eax,DWORD [60+esp]
+ xor ebp,edx
+ xor eax,DWORD [20+esp]
+ xor ebp,edi
+ xor eax,DWORD [40+esp]
+ rol eax,1
+ add esi,ebp
+ ror ecx,2
+ mov ebp,ebx
+ rol ebp,5
+ lea eax,[3395469782+esi*1+eax]
+ mov esi,DWORD [56+esp]
+ add eax,ebp
+ ; 20_39 78
+ mov ebp,ebx
+ xor esi,DWORD [esp]
+ xor ebp,ecx
+ xor esi,DWORD [24+esp]
+ xor ebp,edx
+ xor esi,DWORD [44+esp]
+ rol esi,1
+ add edi,ebp
+ ror ebx,2
+ mov ebp,eax
+ rol ebp,5
+ lea esi,[3395469782+edi*1+esi]
+ mov edi,DWORD [60+esp]
+ add esi,ebp
+ ; 20_39 79
+ mov ebp,eax
+ xor edi,DWORD [4+esp]
+ xor ebp,ebx
+ xor edi,DWORD [28+esp]
+ xor ebp,ecx
+ xor edi,DWORD [48+esp]
+ rol edi,1
+ add edx,ebp
+ ror eax,2
+ mov ebp,esi
+ rol ebp,5
+ lea edi,[3395469782+edx*1+edi]
+ add edi,ebp
+ mov ebp,DWORD [96+esp]
+ mov edx,DWORD [100+esp]
+ add edi,DWORD [ebp]
+ add esi,DWORD [4+ebp]
+ add eax,DWORD [8+ebp]
+ add ebx,DWORD [12+ebp]
+ add ecx,DWORD [16+ebp]
+ mov DWORD [ebp],edi
+ add edx,64
+ mov DWORD [4+ebp],esi
+ cmp edx,DWORD [104+esp]
+ mov DWORD [8+ebp],eax
+ mov edi,ecx
+ mov DWORD [12+ebp],ebx
+ mov esi,edx
+ mov DWORD [16+ebp],ecx
+ jb NEAR L$002loop
+ add esp,76
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+__sha1_block_data_order_shaext:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ call L$003pic_point
+L$003pic_point:
+ pop ebp
+ lea ebp,[(L$K_XX_XX-L$003pic_point)+ebp]
+L$shaext_shortcut:
+ mov edi,DWORD [20+esp]
+ mov ebx,esp
+ mov esi,DWORD [24+esp]
+ mov ecx,DWORD [28+esp]
+ sub esp,32
+ movdqu xmm0,[edi]
+ movd xmm1,DWORD [16+edi]
+ and esp,-32
+ movdqa xmm3,[80+ebp]
+ movdqu xmm4,[esi]
+ pshufd xmm0,xmm0,27
+ movdqu xmm5,[16+esi]
+ pshufd xmm1,xmm1,27
+ movdqu xmm6,[32+esi]
+db 102,15,56,0,227
+ movdqu xmm7,[48+esi]
+db 102,15,56,0,235
+db 102,15,56,0,243
+db 102,15,56,0,251
+ jmp NEAR L$004loop_shaext
+align 16
+L$004loop_shaext:
+ dec ecx
+ lea eax,[64+esi]
+ movdqa [esp],xmm1
+ paddd xmm1,xmm4
+ cmovne esi,eax
+ movdqa [16+esp],xmm0
+db 15,56,201,229
+ movdqa xmm2,xmm0
+db 15,58,204,193,0
+db 15,56,200,213
+ pxor xmm4,xmm6
+db 15,56,201,238
+db 15,56,202,231
+ movdqa xmm1,xmm0
+db 15,58,204,194,0
+db 15,56,200,206
+ pxor xmm5,xmm7
+db 15,56,202,236
+db 15,56,201,247
+ movdqa xmm2,xmm0
+db 15,58,204,193,0
+db 15,56,200,215
+ pxor xmm6,xmm4
+db 15,56,201,252
+db 15,56,202,245
+ movdqa xmm1,xmm0
+db 15,58,204,194,0
+db 15,56,200,204
+ pxor xmm7,xmm5
+db 15,56,202,254
+db 15,56,201,229
+ movdqa xmm2,xmm0
+db 15,58,204,193,0
+db 15,56,200,213
+ pxor xmm4,xmm6
+db 15,56,201,238
+db 15,56,202,231
+ movdqa xmm1,xmm0
+db 15,58,204,194,1
+db 15,56,200,206
+ pxor xmm5,xmm7
+db 15,56,202,236
+db 15,56,201,247
+ movdqa xmm2,xmm0
+db 15,58,204,193,1
+db 15,56,200,215
+ pxor xmm6,xmm4
+db 15,56,201,252
+db 15,56,202,245
+ movdqa xmm1,xmm0
+db 15,58,204,194,1
+db 15,56,200,204
+ pxor xmm7,xmm5
+db 15,56,202,254
+db 15,56,201,229
+ movdqa xmm2,xmm0
+db 15,58,204,193,1
+db 15,56,200,213
+ pxor xmm4,xmm6
+db 15,56,201,238
+db 15,56,202,231
+ movdqa xmm1,xmm0
+db 15,58,204,194,1
+db 15,56,200,206
+ pxor xmm5,xmm7
+db 15,56,202,236
+db 15,56,201,247
+ movdqa xmm2,xmm0
+db 15,58,204,193,2
+db 15,56,200,215
+ pxor xmm6,xmm4
+db 15,56,201,252
+db 15,56,202,245
+ movdqa xmm1,xmm0
+db 15,58,204,194,2
+db 15,56,200,204
+ pxor xmm7,xmm5
+db 15,56,202,254
+db 15,56,201,229
+ movdqa xmm2,xmm0
+db 15,58,204,193,2
+db 15,56,200,213
+ pxor xmm4,xmm6
+db 15,56,201,238
+db 15,56,202,231
+ movdqa xmm1,xmm0
+db 15,58,204,194,2
+db 15,56,200,206
+ pxor xmm5,xmm7
+db 15,56,202,236
+db 15,56,201,247
+ movdqa xmm2,xmm0
+db 15,58,204,193,2
+db 15,56,200,215
+ pxor xmm6,xmm4
+db 15,56,201,252
+db 15,56,202,245
+ movdqa xmm1,xmm0
+db 15,58,204,194,3
+db 15,56,200,204
+ pxor xmm7,xmm5
+db 15,56,202,254
+ movdqu xmm4,[esi]
+ movdqa xmm2,xmm0
+db 15,58,204,193,3
+db 15,56,200,213
+ movdqu xmm5,[16+esi]
+db 102,15,56,0,227
+ movdqa xmm1,xmm0
+db 15,58,204,194,3
+db 15,56,200,206
+ movdqu xmm6,[32+esi]
+db 102,15,56,0,235
+ movdqa xmm2,xmm0
+db 15,58,204,193,3
+db 15,56,200,215
+ movdqu xmm7,[48+esi]
+db 102,15,56,0,243
+ movdqa xmm1,xmm0
+db 15,58,204,194,3
+ movdqa xmm2,[esp]
+db 102,15,56,0,251
+db 15,56,200,202
+ paddd xmm0,[16+esp]
+ jnz NEAR L$004loop_shaext
+ pshufd xmm0,xmm0,27
+ pshufd xmm1,xmm1,27
+ movdqu [edi],xmm0
+ movd DWORD [16+edi],xmm1
+ mov esp,ebx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+__sha1_block_data_order_ssse3:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ call L$005pic_point
+L$005pic_point:
+ pop ebp
+ lea ebp,[(L$K_XX_XX-L$005pic_point)+ebp]
+L$ssse3_shortcut:
+ movdqa xmm7,[ebp]
+ movdqa xmm0,[16+ebp]
+ movdqa xmm1,[32+ebp]
+ movdqa xmm2,[48+ebp]
+ movdqa xmm6,[64+ebp]
+ mov edi,DWORD [20+esp]
+ mov ebp,DWORD [24+esp]
+ mov edx,DWORD [28+esp]
+ mov esi,esp
+ sub esp,208
+ and esp,-64
+ movdqa [112+esp],xmm0
+ movdqa [128+esp],xmm1
+ movdqa [144+esp],xmm2
+ shl edx,6
+ movdqa [160+esp],xmm7
+ add edx,ebp
+ movdqa [176+esp],xmm6
+ add ebp,64
+ mov DWORD [192+esp],edi
+ mov DWORD [196+esp],ebp
+ mov DWORD [200+esp],edx
+ mov DWORD [204+esp],esi
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ mov edi,DWORD [16+edi]
+ mov esi,ebx
+ movdqu xmm0,[ebp-64]
+ movdqu xmm1,[ebp-48]
+ movdqu xmm2,[ebp-32]
+ movdqu xmm3,[ebp-16]
+db 102,15,56,0,198
+db 102,15,56,0,206
+db 102,15,56,0,214
+ movdqa [96+esp],xmm7
+db 102,15,56,0,222
+ paddd xmm0,xmm7
+ paddd xmm1,xmm7
+ paddd xmm2,xmm7
+ movdqa [esp],xmm0
+ psubd xmm0,xmm7
+ movdqa [16+esp],xmm1
+ psubd xmm1,xmm7
+ movdqa [32+esp],xmm2
+ mov ebp,ecx
+ psubd xmm2,xmm7
+ xor ebp,edx
+ pshufd xmm4,xmm0,238
+ and esi,ebp
+ jmp NEAR L$006loop
+align 16
+L$006loop:
+ ror ebx,2
+ xor esi,edx
+ mov ebp,eax
+ punpcklqdq xmm4,xmm1
+ movdqa xmm6,xmm3
+ add edi,DWORD [esp]
+ xor ebx,ecx
+ paddd xmm7,xmm3
+ movdqa [64+esp],xmm0
+ rol eax,5
+ add edi,esi
+ psrldq xmm6,4
+ and ebp,ebx
+ xor ebx,ecx
+ pxor xmm4,xmm0
+ add edi,eax
+ ror eax,7
+ pxor xmm6,xmm2
+ xor ebp,ecx
+ mov esi,edi
+ add edx,DWORD [4+esp]
+ pxor xmm4,xmm6
+ xor eax,ebx
+ rol edi,5
+ movdqa [48+esp],xmm7
+ add edx,ebp
+ and esi,eax
+ movdqa xmm0,xmm4
+ xor eax,ebx
+ add edx,edi
+ ror edi,7
+ movdqa xmm6,xmm4
+ xor esi,ebx
+ pslldq xmm0,12
+ paddd xmm4,xmm4
+ mov ebp,edx
+ add ecx,DWORD [8+esp]
+ psrld xmm6,31
+ xor edi,eax
+ rol edx,5
+ movdqa xmm7,xmm0
+ add ecx,esi
+ and ebp,edi
+ xor edi,eax
+ psrld xmm0,30
+ add ecx,edx
+ ror edx,7
+ por xmm4,xmm6
+ xor ebp,eax
+ mov esi,ecx
+ add ebx,DWORD [12+esp]
+ pslld xmm7,2
+ xor edx,edi
+ rol ecx,5
+ pxor xmm4,xmm0
+ movdqa xmm0,[96+esp]
+ add ebx,ebp
+ and esi,edx
+ pxor xmm4,xmm7
+ pshufd xmm5,xmm1,238
+ xor edx,edi
+ add ebx,ecx
+ ror ecx,7
+ xor esi,edi
+ mov ebp,ebx
+ punpcklqdq xmm5,xmm2
+ movdqa xmm7,xmm4
+ add eax,DWORD [16+esp]
+ xor ecx,edx
+ paddd xmm0,xmm4
+ movdqa [80+esp],xmm1
+ rol ebx,5
+ add eax,esi
+ psrldq xmm7,4
+ and ebp,ecx
+ xor ecx,edx
+ pxor xmm5,xmm1
+ add eax,ebx
+ ror ebx,7
+ pxor xmm7,xmm3
+ xor ebp,edx
+ mov esi,eax
+ add edi,DWORD [20+esp]
+ pxor xmm5,xmm7
+ xor ebx,ecx
+ rol eax,5
+ movdqa [esp],xmm0
+ add edi,ebp
+ and esi,ebx
+ movdqa xmm1,xmm5
+ xor ebx,ecx
+ add edi,eax
+ ror eax,7
+ movdqa xmm7,xmm5
+ xor esi,ecx
+ pslldq xmm1,12
+ paddd xmm5,xmm5
+ mov ebp,edi
+ add edx,DWORD [24+esp]
+ psrld xmm7,31
+ xor eax,ebx
+ rol edi,5
+ movdqa xmm0,xmm1
+ add edx,esi
+ and ebp,eax
+ xor eax,ebx
+ psrld xmm1,30
+ add edx,edi
+ ror edi,7
+ por xmm5,xmm7
+ xor ebp,ebx
+ mov esi,edx
+ add ecx,DWORD [28+esp]
+ pslld xmm0,2
+ xor edi,eax
+ rol edx,5
+ pxor xmm5,xmm1
+ movdqa xmm1,[112+esp]
+ add ecx,ebp
+ and esi,edi
+ pxor xmm5,xmm0
+ pshufd xmm6,xmm2,238
+ xor edi,eax
+ add ecx,edx
+ ror edx,7
+ xor esi,eax
+ mov ebp,ecx
+ punpcklqdq xmm6,xmm3
+ movdqa xmm0,xmm5
+ add ebx,DWORD [32+esp]
+ xor edx,edi
+ paddd xmm1,xmm5
+ movdqa [96+esp],xmm2
+ rol ecx,5
+ add ebx,esi
+ psrldq xmm0,4
+ and ebp,edx
+ xor edx,edi
+ pxor xmm6,xmm2
+ add ebx,ecx
+ ror ecx,7
+ pxor xmm0,xmm4
+ xor ebp,edi
+ mov esi,ebx
+ add eax,DWORD [36+esp]
+ pxor xmm6,xmm0
+ xor ecx,edx
+ rol ebx,5
+ movdqa [16+esp],xmm1
+ add eax,ebp
+ and esi,ecx
+ movdqa xmm2,xmm6
+ xor ecx,edx
+ add eax,ebx
+ ror ebx,7
+ movdqa xmm0,xmm6
+ xor esi,edx
+ pslldq xmm2,12
+ paddd xmm6,xmm6
+ mov ebp,eax
+ add edi,DWORD [40+esp]
+ psrld xmm0,31
+ xor ebx,ecx
+ rol eax,5
+ movdqa xmm1,xmm2
+ add edi,esi
+ and ebp,ebx
+ xor ebx,ecx
+ psrld xmm2,30
+ add edi,eax
+ ror eax,7
+ por xmm6,xmm0
+ xor ebp,ecx
+ movdqa xmm0,[64+esp]
+ mov esi,edi
+ add edx,DWORD [44+esp]
+ pslld xmm1,2
+ xor eax,ebx
+ rol edi,5
+ pxor xmm6,xmm2
+ movdqa xmm2,[112+esp]
+ add edx,ebp
+ and esi,eax
+ pxor xmm6,xmm1
+ pshufd xmm7,xmm3,238
+ xor eax,ebx
+ add edx,edi
+ ror edi,7
+ xor esi,ebx
+ mov ebp,edx
+ punpcklqdq xmm7,xmm4
+ movdqa xmm1,xmm6
+ add ecx,DWORD [48+esp]
+ xor edi,eax
+ paddd xmm2,xmm6
+ movdqa [64+esp],xmm3
+ rol edx,5
+ add ecx,esi
+ psrldq xmm1,4
+ and ebp,edi
+ xor edi,eax
+ pxor xmm7,xmm3
+ add ecx,edx
+ ror edx,7
+ pxor xmm1,xmm5
+ xor ebp,eax
+ mov esi,ecx
+ add ebx,DWORD [52+esp]
+ pxor xmm7,xmm1
+ xor edx,edi
+ rol ecx,5
+ movdqa [32+esp],xmm2
+ add ebx,ebp
+ and esi,edx
+ movdqa xmm3,xmm7
+ xor edx,edi
+ add ebx,ecx
+ ror ecx,7
+ movdqa xmm1,xmm7
+ xor esi,edi
+ pslldq xmm3,12
+ paddd xmm7,xmm7
+ mov ebp,ebx
+ add eax,DWORD [56+esp]
+ psrld xmm1,31
+ xor ecx,edx
+ rol ebx,5
+ movdqa xmm2,xmm3
+ add eax,esi
+ and ebp,ecx
+ xor ecx,edx
+ psrld xmm3,30
+ add eax,ebx
+ ror ebx,7
+ por xmm7,xmm1
+ xor ebp,edx
+ movdqa xmm1,[80+esp]
+ mov esi,eax
+ add edi,DWORD [60+esp]
+ pslld xmm2,2
+ xor ebx,ecx
+ rol eax,5
+ pxor xmm7,xmm3
+ movdqa xmm3,[112+esp]
+ add edi,ebp
+ and esi,ebx
+ pxor xmm7,xmm2
+ pshufd xmm2,xmm6,238
+ xor ebx,ecx
+ add edi,eax
+ ror eax,7
+ pxor xmm0,xmm4
+ punpcklqdq xmm2,xmm7
+ xor esi,ecx
+ mov ebp,edi
+ add edx,DWORD [esp]
+ pxor xmm0,xmm1
+ movdqa [80+esp],xmm4
+ xor eax,ebx
+ rol edi,5
+ movdqa xmm4,xmm3
+ add edx,esi
+ paddd xmm3,xmm7
+ and ebp,eax
+ pxor xmm0,xmm2
+ xor eax,ebx
+ add edx,edi
+ ror edi,7
+ xor ebp,ebx
+ movdqa xmm2,xmm0
+ movdqa [48+esp],xmm3
+ mov esi,edx
+ add ecx,DWORD [4+esp]
+ xor edi,eax
+ rol edx,5
+ pslld xmm0,2
+ add ecx,ebp
+ and esi,edi
+ psrld xmm2,30
+ xor edi,eax
+ add ecx,edx
+ ror edx,7
+ xor esi,eax
+ mov ebp,ecx
+ add ebx,DWORD [8+esp]
+ xor edx,edi
+ rol ecx,5
+ por xmm0,xmm2
+ add ebx,esi
+ and ebp,edx
+ movdqa xmm2,[96+esp]
+ xor edx,edi
+ add ebx,ecx
+ add eax,DWORD [12+esp]
+ xor ebp,edi
+ mov esi,ebx
+ pshufd xmm3,xmm7,238
+ rol ebx,5
+ add eax,ebp
+ xor esi,edx
+ ror ecx,7
+ add eax,ebx
+ add edi,DWORD [16+esp]
+ pxor xmm1,xmm5
+ punpcklqdq xmm3,xmm0
+ xor esi,ecx
+ mov ebp,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ movdqa [96+esp],xmm5
+ add edi,esi
+ xor ebp,ecx
+ movdqa xmm5,xmm4
+ ror ebx,7
+ paddd xmm4,xmm0
+ add edi,eax
+ pxor xmm1,xmm3
+ add edx,DWORD [20+esp]
+ xor ebp,ebx
+ mov esi,edi
+ rol edi,5
+ movdqa xmm3,xmm1
+ movdqa [esp],xmm4
+ add edx,ebp
+ xor esi,ebx
+ ror eax,7
+ add edx,edi
+ pslld xmm1,2
+ add ecx,DWORD [24+esp]
+ xor esi,eax
+ psrld xmm3,30
+ mov ebp,edx
+ rol edx,5
+ add ecx,esi
+ xor ebp,eax
+ ror edi,7
+ add ecx,edx
+ por xmm1,xmm3
+ add ebx,DWORD [28+esp]
+ xor ebp,edi
+ movdqa xmm3,[64+esp]
+ mov esi,ecx
+ rol ecx,5
+ add ebx,ebp
+ xor esi,edi
+ ror edx,7
+ pshufd xmm4,xmm0,238
+ add ebx,ecx
+ add eax,DWORD [32+esp]
+ pxor xmm2,xmm6
+ punpcklqdq xmm4,xmm1
+ xor esi,edx
+ mov ebp,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ movdqa [64+esp],xmm6
+ add eax,esi
+ xor ebp,edx
+ movdqa xmm6,[128+esp]
+ ror ecx,7
+ paddd xmm5,xmm1
+ add eax,ebx
+ pxor xmm2,xmm4
+ add edi,DWORD [36+esp]
+ xor ebp,ecx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm4,xmm2
+ movdqa [16+esp],xmm5
+ add edi,ebp
+ xor esi,ecx
+ ror ebx,7
+ add edi,eax
+ pslld xmm2,2
+ add edx,DWORD [40+esp]
+ xor esi,ebx
+ psrld xmm4,30
+ mov ebp,edi
+ rol edi,5
+ add edx,esi
+ xor ebp,ebx
+ ror eax,7
+ add edx,edi
+ por xmm2,xmm4
+ add ecx,DWORD [44+esp]
+ xor ebp,eax
+ movdqa xmm4,[80+esp]
+ mov esi,edx
+ rol edx,5
+ add ecx,ebp
+ xor esi,eax
+ ror edi,7
+ pshufd xmm5,xmm1,238
+ add ecx,edx
+ add ebx,DWORD [48+esp]
+ pxor xmm3,xmm7
+ punpcklqdq xmm5,xmm2
+ xor esi,edi
+ mov ebp,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ movdqa [80+esp],xmm7
+ add ebx,esi
+ xor ebp,edi
+ movdqa xmm7,xmm6
+ ror edx,7
+ paddd xmm6,xmm2
+ add ebx,ecx
+ pxor xmm3,xmm5
+ add eax,DWORD [52+esp]
+ xor ebp,edx
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm5,xmm3
+ movdqa [32+esp],xmm6
+ add eax,ebp
+ xor esi,edx
+ ror ecx,7
+ add eax,ebx
+ pslld xmm3,2
+ add edi,DWORD [56+esp]
+ xor esi,ecx
+ psrld xmm5,30
+ mov ebp,eax
+ rol eax,5
+ add edi,esi
+ xor ebp,ecx
+ ror ebx,7
+ add edi,eax
+ por xmm3,xmm5
+ add edx,DWORD [60+esp]
+ xor ebp,ebx
+ movdqa xmm5,[96+esp]
+ mov esi,edi
+ rol edi,5
+ add edx,ebp
+ xor esi,ebx
+ ror eax,7
+ pshufd xmm6,xmm2,238
+ add edx,edi
+ add ecx,DWORD [esp]
+ pxor xmm4,xmm0
+ punpcklqdq xmm6,xmm3
+ xor esi,eax
+ mov ebp,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ movdqa [96+esp],xmm0
+ add ecx,esi
+ xor ebp,eax
+ movdqa xmm0,xmm7
+ ror edi,7
+ paddd xmm7,xmm3
+ add ecx,edx
+ pxor xmm4,xmm6
+ add ebx,DWORD [4+esp]
+ xor ebp,edi
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm6,xmm4
+ movdqa [48+esp],xmm7
+ add ebx,ebp
+ xor esi,edi
+ ror edx,7
+ add ebx,ecx
+ pslld xmm4,2
+ add eax,DWORD [8+esp]
+ xor esi,edx
+ psrld xmm6,30
+ mov ebp,ebx
+ rol ebx,5
+ add eax,esi
+ xor ebp,edx
+ ror ecx,7
+ add eax,ebx
+ por xmm4,xmm6
+ add edi,DWORD [12+esp]
+ xor ebp,ecx
+ movdqa xmm6,[64+esp]
+ mov esi,eax
+ rol eax,5
+ add edi,ebp
+ xor esi,ecx
+ ror ebx,7
+ pshufd xmm7,xmm3,238
+ add edi,eax
+ add edx,DWORD [16+esp]
+ pxor xmm5,xmm1
+ punpcklqdq xmm7,xmm4
+ xor esi,ebx
+ mov ebp,edi
+ rol edi,5
+ pxor xmm5,xmm6
+ movdqa [64+esp],xmm1
+ add edx,esi
+ xor ebp,ebx
+ movdqa xmm1,xmm0
+ ror eax,7
+ paddd xmm0,xmm4
+ add edx,edi
+ pxor xmm5,xmm7
+ add ecx,DWORD [20+esp]
+ xor ebp,eax
+ mov esi,edx
+ rol edx,5
+ movdqa xmm7,xmm5
+ movdqa [esp],xmm0
+ add ecx,ebp
+ xor esi,eax
+ ror edi,7
+ add ecx,edx
+ pslld xmm5,2
+ add ebx,DWORD [24+esp]
+ xor esi,edi
+ psrld xmm7,30
+ mov ebp,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edi
+ ror edx,7
+ add ebx,ecx
+ por xmm5,xmm7
+ add eax,DWORD [28+esp]
+ movdqa xmm7,[80+esp]
+ ror ecx,7
+ mov esi,ebx
+ xor ebp,edx
+ rol ebx,5
+ pshufd xmm0,xmm4,238
+ add eax,ebp
+ xor esi,ecx
+ xor ecx,edx
+ add eax,ebx
+ add edi,DWORD [32+esp]
+ pxor xmm6,xmm2
+ punpcklqdq xmm0,xmm5
+ and esi,ecx
+ xor ecx,edx
+ ror ebx,7
+ pxor xmm6,xmm7
+ movdqa [80+esp],xmm2
+ mov ebp,eax
+ xor esi,ecx
+ rol eax,5
+ movdqa xmm2,xmm1
+ add edi,esi
+ paddd xmm1,xmm5
+ xor ebp,ebx
+ pxor xmm6,xmm0
+ xor ebx,ecx
+ add edi,eax
+ add edx,DWORD [36+esp]
+ and ebp,ebx
+ movdqa xmm0,xmm6
+ movdqa [16+esp],xmm1
+ xor ebx,ecx
+ ror eax,7
+ mov esi,edi
+ xor ebp,ebx
+ rol edi,5
+ pslld xmm6,2
+ add edx,ebp
+ xor esi,eax
+ psrld xmm0,30
+ xor eax,ebx
+ add edx,edi
+ add ecx,DWORD [40+esp]
+ and esi,eax
+ xor eax,ebx
+ ror edi,7
+ por xmm6,xmm0
+ mov ebp,edx
+ xor esi,eax
+ movdqa xmm0,[96+esp]
+ rol edx,5
+ add ecx,esi
+ xor ebp,edi
+ xor edi,eax
+ add ecx,edx
+ pshufd xmm1,xmm5,238
+ add ebx,DWORD [44+esp]
+ and ebp,edi
+ xor edi,eax
+ ror edx,7
+ mov esi,ecx
+ xor ebp,edi
+ rol ecx,5
+ add ebx,ebp
+ xor esi,edx
+ xor edx,edi
+ add ebx,ecx
+ add eax,DWORD [48+esp]
+ pxor xmm7,xmm3
+ punpcklqdq xmm1,xmm6
+ and esi,edx
+ xor edx,edi
+ ror ecx,7
+ pxor xmm7,xmm0
+ movdqa [96+esp],xmm3
+ mov ebp,ebx
+ xor esi,edx
+ rol ebx,5
+ movdqa xmm3,[144+esp]
+ add eax,esi
+ paddd xmm2,xmm6
+ xor ebp,ecx
+ pxor xmm7,xmm1
+ xor ecx,edx
+ add eax,ebx
+ add edi,DWORD [52+esp]
+ and ebp,ecx
+ movdqa xmm1,xmm7
+ movdqa [32+esp],xmm2
+ xor ecx,edx
+ ror ebx,7
+ mov esi,eax
+ xor ebp,ecx
+ rol eax,5
+ pslld xmm7,2
+ add edi,ebp
+ xor esi,ebx
+ psrld xmm1,30
+ xor ebx,ecx
+ add edi,eax
+ add edx,DWORD [56+esp]
+ and esi,ebx
+ xor ebx,ecx
+ ror eax,7
+ por xmm7,xmm1
+ mov ebp,edi
+ xor esi,ebx
+ movdqa xmm1,[64+esp]
+ rol edi,5
+ add edx,esi
+ xor ebp,eax
+ xor eax,ebx
+ add edx,edi
+ pshufd xmm2,xmm6,238
+ add ecx,DWORD [60+esp]
+ and ebp,eax
+ xor eax,ebx
+ ror edi,7
+ mov esi,edx
+ xor ebp,eax
+ rol edx,5
+ add ecx,ebp
+ xor esi,edi
+ xor edi,eax
+ add ecx,edx
+ add ebx,DWORD [esp]
+ pxor xmm0,xmm4
+ punpcklqdq xmm2,xmm7
+ and esi,edi
+ xor edi,eax
+ ror edx,7
+ pxor xmm0,xmm1
+ movdqa [64+esp],xmm4
+ mov ebp,ecx
+ xor esi,edi
+ rol ecx,5
+ movdqa xmm4,xmm3
+ add ebx,esi
+ paddd xmm3,xmm7
+ xor ebp,edx
+ pxor xmm0,xmm2
+ xor edx,edi
+ add ebx,ecx
+ add eax,DWORD [4+esp]
+ and ebp,edx
+ movdqa xmm2,xmm0
+ movdqa [48+esp],xmm3
+ xor edx,edi
+ ror ecx,7
+ mov esi,ebx
+ xor ebp,edx
+ rol ebx,5
+ pslld xmm0,2
+ add eax,ebp
+ xor esi,ecx
+ psrld xmm2,30
+ xor ecx,edx
+ add eax,ebx
+ add edi,DWORD [8+esp]
+ and esi,ecx
+ xor ecx,edx
+ ror ebx,7
+ por xmm0,xmm2
+ mov ebp,eax
+ xor esi,ecx
+ movdqa xmm2,[80+esp]
+ rol eax,5
+ add edi,esi
+ xor ebp,ebx
+ xor ebx,ecx
+ add edi,eax
+ pshufd xmm3,xmm7,238
+ add edx,DWORD [12+esp]
+ and ebp,ebx
+ xor ebx,ecx
+ ror eax,7
+ mov esi,edi
+ xor ebp,ebx
+ rol edi,5
+ add edx,ebp
+ xor esi,eax
+ xor eax,ebx
+ add edx,edi
+ add ecx,DWORD [16+esp]
+ pxor xmm1,xmm5
+ punpcklqdq xmm3,xmm0
+ and esi,eax
+ xor eax,ebx
+ ror edi,7
+ pxor xmm1,xmm2
+ movdqa [80+esp],xmm5
+ mov ebp,edx
+ xor esi,eax
+ rol edx,5
+ movdqa xmm5,xmm4
+ add ecx,esi
+ paddd xmm4,xmm0
+ xor ebp,edi
+ pxor xmm1,xmm3
+ xor edi,eax
+ add ecx,edx
+ add ebx,DWORD [20+esp]
+ and ebp,edi
+ movdqa xmm3,xmm1
+ movdqa [esp],xmm4
+ xor edi,eax
+ ror edx,7
+ mov esi,ecx
+ xor ebp,edi
+ rol ecx,5
+ pslld xmm1,2
+ add ebx,ebp
+ xor esi,edx
+ psrld xmm3,30
+ xor edx,edi
+ add ebx,ecx
+ add eax,DWORD [24+esp]
+ and esi,edx
+ xor edx,edi
+ ror ecx,7
+ por xmm1,xmm3
+ mov ebp,ebx
+ xor esi,edx
+ movdqa xmm3,[96+esp]
+ rol ebx,5
+ add eax,esi
+ xor ebp,ecx
+ xor ecx,edx
+ add eax,ebx
+ pshufd xmm4,xmm0,238
+ add edi,DWORD [28+esp]
+ and ebp,ecx
+ xor ecx,edx
+ ror ebx,7
+ mov esi,eax
+ xor ebp,ecx
+ rol eax,5
+ add edi,ebp
+ xor esi,ebx
+ xor ebx,ecx
+ add edi,eax
+ add edx,DWORD [32+esp]
+ pxor xmm2,xmm6
+ punpcklqdq xmm4,xmm1
+ and esi,ebx
+ xor ebx,ecx
+ ror eax,7
+ pxor xmm2,xmm3
+ movdqa [96+esp],xmm6
+ mov ebp,edi
+ xor esi,ebx
+ rol edi,5
+ movdqa xmm6,xmm5
+ add edx,esi
+ paddd xmm5,xmm1
+ xor ebp,eax
+ pxor xmm2,xmm4
+ xor eax,ebx
+ add edx,edi
+ add ecx,DWORD [36+esp]
+ and ebp,eax
+ movdqa xmm4,xmm2
+ movdqa [16+esp],xmm5
+ xor eax,ebx
+ ror edi,7
+ mov esi,edx
+ xor ebp,eax
+ rol edx,5
+ pslld xmm2,2
+ add ecx,ebp
+ xor esi,edi
+ psrld xmm4,30
+ xor edi,eax
+ add ecx,edx
+ add ebx,DWORD [40+esp]
+ and esi,edi
+ xor edi,eax
+ ror edx,7
+ por xmm2,xmm4
+ mov ebp,ecx
+ xor esi,edi
+ movdqa xmm4,[64+esp]
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edx
+ xor edx,edi
+ add ebx,ecx
+ pshufd xmm5,xmm1,238
+ add eax,DWORD [44+esp]
+ and ebp,edx
+ xor edx,edi
+ ror ecx,7
+ mov esi,ebx
+ xor ebp,edx
+ rol ebx,5
+ add eax,ebp
+ xor esi,edx
+ add eax,ebx
+ add edi,DWORD [48+esp]
+ pxor xmm3,xmm7
+ punpcklqdq xmm5,xmm2
+ xor esi,ecx
+ mov ebp,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ movdqa [64+esp],xmm7
+ add edi,esi
+ xor ebp,ecx
+ movdqa xmm7,xmm6
+ ror ebx,7
+ paddd xmm6,xmm2
+ add edi,eax
+ pxor xmm3,xmm5
+ add edx,DWORD [52+esp]
+ xor ebp,ebx
+ mov esi,edi
+ rol edi,5
+ movdqa xmm5,xmm3
+ movdqa [32+esp],xmm6
+ add edx,ebp
+ xor esi,ebx
+ ror eax,7
+ add edx,edi
+ pslld xmm3,2
+ add ecx,DWORD [56+esp]
+ xor esi,eax
+ psrld xmm5,30
+ mov ebp,edx
+ rol edx,5
+ add ecx,esi
+ xor ebp,eax
+ ror edi,7
+ add ecx,edx
+ por xmm3,xmm5
+ add ebx,DWORD [60+esp]
+ xor ebp,edi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,ebp
+ xor esi,edi
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD [esp]
+ xor esi,edx
+ mov ebp,ebx
+ rol ebx,5
+ add eax,esi
+ xor ebp,edx
+ ror ecx,7
+ paddd xmm7,xmm3
+ add eax,ebx
+ add edi,DWORD [4+esp]
+ xor ebp,ecx
+ mov esi,eax
+ movdqa [48+esp],xmm7
+ rol eax,5
+ add edi,ebp
+ xor esi,ecx
+ ror ebx,7
+ add edi,eax
+ add edx,DWORD [8+esp]
+ xor esi,ebx
+ mov ebp,edi
+ rol edi,5
+ add edx,esi
+ xor ebp,ebx
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD [12+esp]
+ xor ebp,eax
+ mov esi,edx
+ rol edx,5
+ add ecx,ebp
+ xor esi,eax
+ ror edi,7
+ add ecx,edx
+ mov ebp,DWORD [196+esp]
+ cmp ebp,DWORD [200+esp]
+ je NEAR L$007done
+ movdqa xmm7,[160+esp]
+ movdqa xmm6,[176+esp]
+ movdqu xmm0,[ebp]
+ movdqu xmm1,[16+ebp]
+ movdqu xmm2,[32+ebp]
+ movdqu xmm3,[48+ebp]
+ add ebp,64
+db 102,15,56,0,198
+ mov DWORD [196+esp],ebp
+ movdqa [96+esp],xmm7
+ add ebx,DWORD [16+esp]
+ xor esi,edi
+ mov ebp,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edi
+ ror edx,7
+db 102,15,56,0,206
+ add ebx,ecx
+ add eax,DWORD [20+esp]
+ xor ebp,edx
+ mov esi,ebx
+ paddd xmm0,xmm7
+ rol ebx,5
+ add eax,ebp
+ xor esi,edx
+ ror ecx,7
+ movdqa [esp],xmm0
+ add eax,ebx
+ add edi,DWORD [24+esp]
+ xor esi,ecx
+ mov ebp,eax
+ psubd xmm0,xmm7
+ rol eax,5
+ add edi,esi
+ xor ebp,ecx
+ ror ebx,7
+ add edi,eax
+ add edx,DWORD [28+esp]
+ xor ebp,ebx
+ mov esi,edi
+ rol edi,5
+ add edx,ebp
+ xor esi,ebx
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD [32+esp]
+ xor esi,eax
+ mov ebp,edx
+ rol edx,5
+ add ecx,esi
+ xor ebp,eax
+ ror edi,7
+db 102,15,56,0,214
+ add ecx,edx
+ add ebx,DWORD [36+esp]
+ xor ebp,edi
+ mov esi,ecx
+ paddd xmm1,xmm7
+ rol ecx,5
+ add ebx,ebp
+ xor esi,edi
+ ror edx,7
+ movdqa [16+esp],xmm1
+ add ebx,ecx
+ add eax,DWORD [40+esp]
+ xor esi,edx
+ mov ebp,ebx
+ psubd xmm1,xmm7
+ rol ebx,5
+ add eax,esi
+ xor ebp,edx
+ ror ecx,7
+ add eax,ebx
+ add edi,DWORD [44+esp]
+ xor ebp,ecx
+ mov esi,eax
+ rol eax,5
+ add edi,ebp
+ xor esi,ecx
+ ror ebx,7
+ add edi,eax
+ add edx,DWORD [48+esp]
+ xor esi,ebx
+ mov ebp,edi
+ rol edi,5
+ add edx,esi
+ xor ebp,ebx
+ ror eax,7
+db 102,15,56,0,222
+ add edx,edi
+ add ecx,DWORD [52+esp]
+ xor ebp,eax
+ mov esi,edx
+ paddd xmm2,xmm7
+ rol edx,5
+ add ecx,ebp
+ xor esi,eax
+ ror edi,7
+ movdqa [32+esp],xmm2
+ add ecx,edx
+ add ebx,DWORD [56+esp]
+ xor esi,edi
+ mov ebp,ecx
+ psubd xmm2,xmm7
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edi
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD [60+esp]
+ xor ebp,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,ebp
+ ror ecx,7
+ add eax,ebx
+ mov ebp,DWORD [192+esp]
+ add eax,DWORD [ebp]
+ add esi,DWORD [4+ebp]
+ add ecx,DWORD [8+ebp]
+ mov DWORD [ebp],eax
+ add edx,DWORD [12+ebp]
+ mov DWORD [4+ebp],esi
+ add edi,DWORD [16+ebp]
+ mov DWORD [8+ebp],ecx
+ mov ebx,ecx
+ mov DWORD [12+ebp],edx
+ xor ebx,edx
+ mov DWORD [16+ebp],edi
+ mov ebp,esi
+ pshufd xmm4,xmm0,238
+ and esi,ebx
+ mov ebx,ebp
+ jmp NEAR L$006loop
+align 16
+L$007done:
+ add ebx,DWORD [16+esp]
+ xor esi,edi
+ mov ebp,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edi
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD [20+esp]
+ xor ebp,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,ebp
+ xor esi,edx
+ ror ecx,7
+ add eax,ebx
+ add edi,DWORD [24+esp]
+ xor esi,ecx
+ mov ebp,eax
+ rol eax,5
+ add edi,esi
+ xor ebp,ecx
+ ror ebx,7
+ add edi,eax
+ add edx,DWORD [28+esp]
+ xor ebp,ebx
+ mov esi,edi
+ rol edi,5
+ add edx,ebp
+ xor esi,ebx
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD [32+esp]
+ xor esi,eax
+ mov ebp,edx
+ rol edx,5
+ add ecx,esi
+ xor ebp,eax
+ ror edi,7
+ add ecx,edx
+ add ebx,DWORD [36+esp]
+ xor ebp,edi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,ebp
+ xor esi,edi
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD [40+esp]
+ xor esi,edx
+ mov ebp,ebx
+ rol ebx,5
+ add eax,esi
+ xor ebp,edx
+ ror ecx,7
+ add eax,ebx
+ add edi,DWORD [44+esp]
+ xor ebp,ecx
+ mov esi,eax
+ rol eax,5
+ add edi,ebp
+ xor esi,ecx
+ ror ebx,7
+ add edi,eax
+ add edx,DWORD [48+esp]
+ xor esi,ebx
+ mov ebp,edi
+ rol edi,5
+ add edx,esi
+ xor ebp,ebx
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD [52+esp]
+ xor ebp,eax
+ mov esi,edx
+ rol edx,5
+ add ecx,ebp
+ xor esi,eax
+ ror edi,7
+ add ecx,edx
+ add ebx,DWORD [56+esp]
+ xor esi,edi
+ mov ebp,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,edi
+ ror edx,7
+ add ebx,ecx
+ add eax,DWORD [60+esp]
+ xor ebp,edx
+ mov esi,ebx
+ rol ebx,5
+ add eax,ebp
+ ror ecx,7
+ add eax,ebx
+ mov ebp,DWORD [192+esp]
+ add eax,DWORD [ebp]
+ mov esp,DWORD [204+esp]
+ add esi,DWORD [4+ebp]
+ add ecx,DWORD [8+ebp]
+ mov DWORD [ebp],eax
+ add edx,DWORD [12+ebp]
+ mov DWORD [4+ebp],esi
+ add edi,DWORD [16+ebp]
+ mov DWORD [8+ebp],ecx
+ mov DWORD [12+ebp],edx
+ mov DWORD [16+ebp],edi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 64
+L$K_XX_XX:
+dd 1518500249,1518500249,1518500249,1518500249
+dd 1859775393,1859775393,1859775393,1859775393
+dd 2400959708,2400959708,2400959708,2400959708
+dd 3395469782,3395469782,3395469782,3395469782
+dd 66051,67438087,134810123,202182159
+db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+db 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/sha/sha256-586.asm b/win-x86/crypto/sha/sha256-586.asm
new file mode 100644
index 0000000..fe36bc5
--- /dev/null
+++ b/win-x86/crypto/sha/sha256-586.asm
@@ -0,0 +1,4591 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _sha256_block_data_order
+align 16
+_sha256_block_data_order:
+L$_sha256_block_data_order_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov ebx,esp
+ call L$000pic_point
+L$000pic_point:
+ pop ebp
+ lea ebp,[(L$001K256-L$000pic_point)+ebp]
+ sub esp,16
+ and esp,-64
+ shl eax,6
+ add eax,edi
+ mov DWORD [esp],esi
+ mov DWORD [4+esp],edi
+ mov DWORD [8+esp],eax
+ mov DWORD [12+esp],ebx
+ lea edx,[_OPENSSL_ia32cap_P]
+ mov ecx,DWORD [edx]
+ mov ebx,DWORD [4+edx]
+ test ecx,1048576
+ jnz NEAR L$002loop
+ mov edx,DWORD [8+edx]
+ test ecx,16777216
+ jz NEAR L$003no_xmm
+ and ecx,1073741824
+ and ebx,268435968
+ test edx,536870912
+ jnz NEAR L$004shaext
+ or ecx,ebx
+ and ecx,1342177280
+ cmp ecx,1342177280
+ test ebx,512
+ jnz NEAR L$005SSSE3
+L$003no_xmm:
+ sub eax,edi
+ cmp eax,256
+ jae NEAR L$006unrolled
+ jmp NEAR L$002loop
+align 16
+L$002loop:
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ bswap eax
+ mov edx,DWORD [12+edi]
+ bswap ebx
+ push eax
+ bswap ecx
+ push ebx
+ bswap edx
+ push ecx
+ push edx
+ mov eax,DWORD [16+edi]
+ mov ebx,DWORD [20+edi]
+ mov ecx,DWORD [24+edi]
+ bswap eax
+ mov edx,DWORD [28+edi]
+ bswap ebx
+ push eax
+ bswap ecx
+ push ebx
+ bswap edx
+ push ecx
+ push edx
+ mov eax,DWORD [32+edi]
+ mov ebx,DWORD [36+edi]
+ mov ecx,DWORD [40+edi]
+ bswap eax
+ mov edx,DWORD [44+edi]
+ bswap ebx
+ push eax
+ bswap ecx
+ push ebx
+ bswap edx
+ push ecx
+ push edx
+ mov eax,DWORD [48+edi]
+ mov ebx,DWORD [52+edi]
+ mov ecx,DWORD [56+edi]
+ bswap eax
+ mov edx,DWORD [60+edi]
+ bswap ebx
+ push eax
+ bswap ecx
+ push ebx
+ bswap edx
+ push ecx
+ push edx
+ add edi,64
+ lea esp,[esp-36]
+ mov DWORD [104+esp],edi
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edi,DWORD [12+esi]
+ mov DWORD [8+esp],ebx
+ xor ebx,ecx
+ mov DWORD [12+esp],ecx
+ mov DWORD [16+esp],edi
+ mov DWORD [esp],ebx
+ mov edx,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov edi,DWORD [28+esi]
+ mov DWORD [24+esp],ebx
+ mov DWORD [28+esp],ecx
+ mov DWORD [32+esp],edi
+align 16
+L$00700_15:
+ mov ecx,edx
+ mov esi,DWORD [24+esp]
+ ror ecx,14
+ mov edi,DWORD [28+esp]
+ xor ecx,edx
+ xor esi,edi
+ mov ebx,DWORD [96+esp]
+ ror ecx,5
+ and esi,edx
+ mov DWORD [20+esp],edx
+ xor edx,ecx
+ add ebx,DWORD [32+esp]
+ xor esi,edi
+ ror edx,6
+ mov ecx,eax
+ add ebx,esi
+ ror ecx,9
+ add ebx,edx
+ mov edi,DWORD [8+esp]
+ xor ecx,eax
+ mov DWORD [4+esp],eax
+ lea esp,[esp-4]
+ ror ecx,11
+ mov esi,DWORD [ebp]
+ xor ecx,eax
+ mov edx,DWORD [20+esp]
+ xor eax,edi
+ ror ecx,2
+ add ebx,esi
+ mov DWORD [esp],eax
+ add edx,ebx
+ and eax,DWORD [4+esp]
+ add ebx,ecx
+ xor eax,edi
+ add ebp,4
+ add eax,ebx
+ cmp esi,3248222580
+ jne NEAR L$00700_15
+ mov ecx,DWORD [156+esp]
+ jmp NEAR L$00816_63
+align 16
+L$00816_63:
+ mov ebx,ecx
+ mov esi,DWORD [104+esp]
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [160+esp]
+ shr edi,10
+ add ebx,DWORD [124+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [24+esp]
+ ror ecx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor ecx,edx
+ xor esi,edi
+ mov DWORD [96+esp],ebx
+ ror ecx,5
+ and esi,edx
+ mov DWORD [20+esp],edx
+ xor edx,ecx
+ add ebx,DWORD [32+esp]
+ xor esi,edi
+ ror edx,6
+ mov ecx,eax
+ add ebx,esi
+ ror ecx,9
+ add ebx,edx
+ mov edi,DWORD [8+esp]
+ xor ecx,eax
+ mov DWORD [4+esp],eax
+ lea esp,[esp-4]
+ ror ecx,11
+ mov esi,DWORD [ebp]
+ xor ecx,eax
+ mov edx,DWORD [20+esp]
+ xor eax,edi
+ ror ecx,2
+ add ebx,esi
+ mov DWORD [esp],eax
+ add edx,ebx
+ and eax,DWORD [4+esp]
+ add ebx,ecx
+ xor eax,edi
+ mov ecx,DWORD [156+esp]
+ add ebp,4
+ add eax,ebx
+ cmp esi,3329325298
+ jne NEAR L$00816_63
+ mov esi,DWORD [356+esp]
+ mov ebx,DWORD [8+esp]
+ mov ecx,DWORD [16+esp]
+ add eax,DWORD [esi]
+ add ebx,DWORD [4+esi]
+ add edi,DWORD [8+esi]
+ add ecx,DWORD [12+esi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],edi
+ mov DWORD [12+esi],ecx
+ mov eax,DWORD [24+esp]
+ mov ebx,DWORD [28+esp]
+ mov ecx,DWORD [32+esp]
+ mov edi,DWORD [360+esp]
+ add edx,DWORD [16+esi]
+ add eax,DWORD [20+esi]
+ add ebx,DWORD [24+esi]
+ add ecx,DWORD [28+esi]
+ mov DWORD [16+esi],edx
+ mov DWORD [20+esi],eax
+ mov DWORD [24+esi],ebx
+ mov DWORD [28+esi],ecx
+ lea esp,[356+esp]
+ sub ebp,256
+ cmp edi,DWORD [8+esp]
+ jb NEAR L$002loop
+ mov esp,DWORD [12+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 64
+L$001K256:
+dd 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+dd 66051,67438087,134810123,202182159
+db 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db 62,0
+align 16
+L$006unrolled:
+ lea esp,[esp-96]
+ mov eax,DWORD [esi]
+ mov ebp,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov ebx,DWORD [12+esi]
+ mov DWORD [4+esp],ebp
+ xor ebp,ecx
+ mov DWORD [8+esp],ecx
+ mov DWORD [12+esp],ebx
+ mov edx,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov esi,DWORD [28+esi]
+ mov DWORD [20+esp],ebx
+ mov DWORD [24+esp],ecx
+ mov DWORD [28+esp],esi
+ jmp NEAR L$009grand_loop
+align 16
+L$009grand_loop:
+ mov ebx,DWORD [edi]
+ mov ecx,DWORD [4+edi]
+ bswap ebx
+ mov esi,DWORD [8+edi]
+ bswap ecx
+ mov DWORD [32+esp],ebx
+ bswap esi
+ mov DWORD [36+esp],ecx
+ mov DWORD [40+esp],esi
+ mov ebx,DWORD [12+edi]
+ mov ecx,DWORD [16+edi]
+ bswap ebx
+ mov esi,DWORD [20+edi]
+ bswap ecx
+ mov DWORD [44+esp],ebx
+ bswap esi
+ mov DWORD [48+esp],ecx
+ mov DWORD [52+esp],esi
+ mov ebx,DWORD [24+edi]
+ mov ecx,DWORD [28+edi]
+ bswap ebx
+ mov esi,DWORD [32+edi]
+ bswap ecx
+ mov DWORD [56+esp],ebx
+ bswap esi
+ mov DWORD [60+esp],ecx
+ mov DWORD [64+esp],esi
+ mov ebx,DWORD [36+edi]
+ mov ecx,DWORD [40+edi]
+ bswap ebx
+ mov esi,DWORD [44+edi]
+ bswap ecx
+ mov DWORD [68+esp],ebx
+ bswap esi
+ mov DWORD [72+esp],ecx
+ mov DWORD [76+esp],esi
+ mov ebx,DWORD [48+edi]
+ mov ecx,DWORD [52+edi]
+ bswap ebx
+ mov esi,DWORD [56+edi]
+ bswap ecx
+ mov DWORD [80+esp],ebx
+ bswap esi
+ mov DWORD [84+esp],ecx
+ mov DWORD [88+esp],esi
+ mov ebx,DWORD [60+edi]
+ add edi,64
+ bswap ebx
+ mov DWORD [100+esp],edi
+ mov DWORD [92+esp],ebx
+ mov ecx,edx
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov ebx,DWORD [32+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1116352408+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov ebx,DWORD [36+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1899447441+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov ebx,DWORD [40+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3049323471+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov ebx,DWORD [44+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3921009573+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov ebx,DWORD [48+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[961987163+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov ebx,DWORD [52+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1508970993+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov ebx,DWORD [56+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2453635748+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov ebx,DWORD [60+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2870763221+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov ebx,DWORD [64+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3624381080+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov ebx,DWORD [68+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[310598401+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov ebx,DWORD [72+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[607225278+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov ebx,DWORD [76+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1426881987+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov ebx,DWORD [80+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1925078388+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov ebx,DWORD [84+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2162078206+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov ecx,edx
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov ebx,DWORD [88+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2614888103+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov esi,edx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov ebx,DWORD [92+esp]
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3248222580+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [36+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [88+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [32+esp]
+ shr edi,10
+ add ebx,DWORD [68+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [32+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3835390401+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [40+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [92+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [36+esp]
+ shr edi,10
+ add ebx,DWORD [72+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [36+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[4022224774+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [44+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [32+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [40+esp]
+ shr edi,10
+ add ebx,DWORD [76+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [40+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[264347078+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [48+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [36+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [44+esp]
+ shr edi,10
+ add ebx,DWORD [80+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [44+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[604807628+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [52+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [40+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [48+esp]
+ shr edi,10
+ add ebx,DWORD [84+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [48+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[770255983+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [56+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [44+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [52+esp]
+ shr edi,10
+ add ebx,DWORD [88+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [52+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1249150122+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [60+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [48+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [56+esp]
+ shr edi,10
+ add ebx,DWORD [92+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov DWORD [56+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1555081692+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [64+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [52+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [60+esp]
+ shr edi,10
+ add ebx,DWORD [32+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov DWORD [60+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1996064986+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [68+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [56+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [64+esp]
+ shr edi,10
+ add ebx,DWORD [36+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [64+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2554220882+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [72+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [60+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [68+esp]
+ shr edi,10
+ add ebx,DWORD [40+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [68+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2821834349+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [76+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [64+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [72+esp]
+ shr edi,10
+ add ebx,DWORD [44+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [72+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2952996808+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [80+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [68+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [76+esp]
+ shr edi,10
+ add ebx,DWORD [48+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [76+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3210313671+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [84+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [72+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [80+esp]
+ shr edi,10
+ add ebx,DWORD [52+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [80+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3336571891+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [88+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [76+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [84+esp]
+ shr edi,10
+ add ebx,DWORD [56+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [84+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3584528711+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [92+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [80+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [88+esp]
+ shr edi,10
+ add ebx,DWORD [60+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov DWORD [88+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[113926993+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [32+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [84+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [92+esp]
+ shr edi,10
+ add ebx,DWORD [64+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov DWORD [92+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[338241895+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [36+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [88+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [32+esp]
+ shr edi,10
+ add ebx,DWORD [68+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [32+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[666307205+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [40+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [92+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [36+esp]
+ shr edi,10
+ add ebx,DWORD [72+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [36+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[773529912+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [44+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [32+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [40+esp]
+ shr edi,10
+ add ebx,DWORD [76+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [40+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1294757372+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [48+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [36+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [44+esp]
+ shr edi,10
+ add ebx,DWORD [80+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [44+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1396182291+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [52+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [40+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [48+esp]
+ shr edi,10
+ add ebx,DWORD [84+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [48+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1695183700+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [56+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [44+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [52+esp]
+ shr edi,10
+ add ebx,DWORD [88+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [52+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1986661051+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [60+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [48+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [56+esp]
+ shr edi,10
+ add ebx,DWORD [92+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov DWORD [56+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2177026350+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [64+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [52+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [60+esp]
+ shr edi,10
+ add ebx,DWORD [32+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov DWORD [60+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2456956037+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [68+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [56+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [64+esp]
+ shr edi,10
+ add ebx,DWORD [36+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [64+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2730485921+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [72+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [60+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [68+esp]
+ shr edi,10
+ add ebx,DWORD [40+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [68+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2820302411+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [76+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [64+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [72+esp]
+ shr edi,10
+ add ebx,DWORD [44+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [72+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3259730800+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [80+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [68+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [76+esp]
+ shr edi,10
+ add ebx,DWORD [48+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [76+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3345764771+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [84+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [72+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [80+esp]
+ shr edi,10
+ add ebx,DWORD [52+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [80+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3516065817+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [88+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [76+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [84+esp]
+ shr edi,10
+ add ebx,DWORD [56+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [84+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3600352804+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [92+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [80+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [88+esp]
+ shr edi,10
+ add ebx,DWORD [60+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov DWORD [88+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[4094571909+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [32+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [84+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [92+esp]
+ shr edi,10
+ add ebx,DWORD [64+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov DWORD [92+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[275423344+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [36+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [88+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [32+esp]
+ shr edi,10
+ add ebx,DWORD [68+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [32+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[430227734+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [40+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [92+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [36+esp]
+ shr edi,10
+ add ebx,DWORD [72+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [36+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[506948616+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [44+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [32+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [40+esp]
+ shr edi,10
+ add ebx,DWORD [76+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [40+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[659060556+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [48+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [36+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [44+esp]
+ shr edi,10
+ add ebx,DWORD [80+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [44+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[883997877+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [52+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [40+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [48+esp]
+ shr edi,10
+ add ebx,DWORD [84+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [48+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[958139571+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [56+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [44+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [52+esp]
+ shr edi,10
+ add ebx,DWORD [88+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [52+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1322822218+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [60+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [48+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [56+esp]
+ shr edi,10
+ add ebx,DWORD [92+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ mov DWORD [56+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1537002063+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [64+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [52+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [60+esp]
+ shr edi,10
+ add ebx,DWORD [32+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ mov DWORD [60+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[1747873779+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [68+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [56+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [64+esp]
+ shr edi,10
+ add ebx,DWORD [36+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [20+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [24+esp]
+ xor edx,ecx
+ mov DWORD [64+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [28+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [4+esp]
+ xor ecx,eax
+ mov DWORD [esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[1955562222+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [72+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [12+esp]
+ add ebp,ecx
+ mov ecx,DWORD [60+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [68+esp]
+ shr edi,10
+ add ebx,DWORD [40+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [16+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [20+esp]
+ xor edx,esi
+ mov DWORD [68+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [12+esp],esi
+ xor edx,esi
+ add ebx,DWORD [24+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [esp]
+ xor esi,ebp
+ mov DWORD [28+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2024104815+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [76+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,esi
+ mov esi,DWORD [64+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [72+esp]
+ shr edi,10
+ add ebx,DWORD [44+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [12+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [16+esp]
+ xor edx,ecx
+ mov DWORD [72+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [20+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [28+esp]
+ xor ecx,eax
+ mov DWORD [24+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2227730452+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [80+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [4+esp]
+ add ebp,ecx
+ mov ecx,DWORD [68+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [76+esp]
+ shr edi,10
+ add ebx,DWORD [48+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [8+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [12+esp]
+ xor edx,esi
+ mov DWORD [76+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [4+esp],esi
+ xor edx,esi
+ add ebx,DWORD [16+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [24+esp]
+ xor esi,ebp
+ mov DWORD [20+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2361852424+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [84+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,esi
+ mov esi,DWORD [72+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [80+esp]
+ shr edi,10
+ add ebx,DWORD [52+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [4+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [8+esp]
+ xor edx,ecx
+ mov DWORD [80+esp],ebx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [12+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [20+esp]
+ xor ecx,eax
+ mov DWORD [16+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[2428436474+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [88+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [28+esp]
+ add ebp,ecx
+ mov ecx,DWORD [76+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [84+esp]
+ shr edi,10
+ add ebx,DWORD [56+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [4+esp]
+ xor edx,esi
+ mov DWORD [84+esp],ebx
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [28+esp],esi
+ xor edx,esi
+ add ebx,DWORD [8+esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [16+esp]
+ xor esi,ebp
+ mov DWORD [12+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[2756734187+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ mov ecx,DWORD [92+esp]
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,esi
+ mov esi,DWORD [80+esp]
+ mov ebx,ecx
+ ror ecx,11
+ mov edi,esi
+ ror esi,2
+ xor ecx,ebx
+ shr ebx,3
+ ror ecx,7
+ xor esi,edi
+ xor ebx,ecx
+ ror esi,17
+ add ebx,DWORD [88+esp]
+ shr edi,10
+ add ebx,DWORD [60+esp]
+ mov ecx,edx
+ xor edi,esi
+ mov esi,DWORD [28+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [esp]
+ xor edx,ecx
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ add ebx,DWORD [4+esp]
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add ebx,edi
+ ror ecx,9
+ mov esi,eax
+ mov edi,DWORD [12+esp]
+ xor ecx,eax
+ mov DWORD [8+esp],eax
+ xor eax,edi
+ ror ecx,11
+ and ebp,eax
+ lea edx,[3204031479+edx*1+ebx]
+ xor ecx,esi
+ xor ebp,edi
+ mov esi,DWORD [32+esp]
+ ror ecx,2
+ add ebp,edx
+ add edx,DWORD [20+esp]
+ add ebp,ecx
+ mov ecx,DWORD [84+esp]
+ mov ebx,esi
+ ror esi,11
+ mov edi,ecx
+ ror ecx,2
+ xor esi,ebx
+ shr ebx,3
+ ror esi,7
+ xor ecx,edi
+ xor ebx,esi
+ ror ecx,17
+ add ebx,DWORD [92+esp]
+ shr edi,10
+ add ebx,DWORD [64+esp]
+ mov esi,edx
+ xor edi,ecx
+ mov ecx,DWORD [24+esp]
+ ror edx,14
+ add ebx,edi
+ mov edi,DWORD [28+esp]
+ xor edx,esi
+ xor ecx,edi
+ ror edx,5
+ and ecx,esi
+ mov DWORD [20+esp],esi
+ xor edx,esi
+ add ebx,DWORD [esp]
+ xor edi,ecx
+ ror edx,6
+ mov esi,ebp
+ add ebx,edi
+ ror esi,9
+ mov ecx,ebp
+ mov edi,DWORD [8+esp]
+ xor esi,ebp
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ ror esi,11
+ and eax,ebp
+ lea edx,[3329325298+edx*1+ebx]
+ xor esi,ecx
+ xor eax,edi
+ ror esi,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,esi
+ mov esi,DWORD [96+esp]
+ xor ebp,edi
+ mov ecx,DWORD [12+esp]
+ add eax,DWORD [esi]
+ add ebp,DWORD [4+esi]
+ add edi,DWORD [8+esi]
+ add ecx,DWORD [12+esi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebp
+ mov DWORD [8+esi],edi
+ mov DWORD [12+esi],ecx
+ mov DWORD [4+esp],ebp
+ xor ebp,edi
+ mov DWORD [8+esp],edi
+ mov DWORD [12+esp],ecx
+ mov edi,DWORD [20+esp]
+ mov ebx,DWORD [24+esp]
+ mov ecx,DWORD [28+esp]
+ add edx,DWORD [16+esi]
+ add edi,DWORD [20+esi]
+ add ebx,DWORD [24+esi]
+ add ecx,DWORD [28+esi]
+ mov DWORD [16+esi],edx
+ mov DWORD [20+esi],edi
+ mov DWORD [24+esi],ebx
+ mov DWORD [28+esi],ecx
+ mov DWORD [20+esp],edi
+ mov edi,DWORD [100+esp]
+ mov DWORD [24+esp],ebx
+ mov DWORD [28+esp],ecx
+ cmp edi,DWORD [104+esp]
+ jb NEAR L$009grand_loop
+ mov esp,DWORD [108+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 32
+L$004shaext:
+ sub esp,32
+ movdqu xmm1,[esi]
+ lea ebp,[128+ebp]
+ movdqu xmm2,[16+esi]
+ movdqa xmm7,[128+ebp]
+ pshufd xmm0,xmm1,27
+ pshufd xmm1,xmm1,177
+ pshufd xmm2,xmm2,27
+db 102,15,58,15,202,8
+ punpcklqdq xmm2,xmm0
+ jmp NEAR L$010loop_shaext
+align 16
+L$010loop_shaext:
+ movdqu xmm3,[edi]
+ movdqu xmm4,[16+edi]
+ movdqu xmm5,[32+edi]
+db 102,15,56,0,223
+ movdqu xmm6,[48+edi]
+ movdqa [16+esp],xmm2
+ movdqa xmm0,[ebp-128]
+ paddd xmm0,xmm3
+db 102,15,56,0,231
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ nop
+ movdqa [esp],xmm1
+db 15,56,203,202
+ movdqa xmm0,[ebp-112]
+ paddd xmm0,xmm4
+db 102,15,56,0,239
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ lea edi,[64+edi]
+db 15,56,204,220
+db 15,56,203,202
+ movdqa xmm0,[ebp-96]
+ paddd xmm0,xmm5
+db 102,15,56,0,247
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm6
+db 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+db 15,56,204,229
+db 15,56,203,202
+ movdqa xmm0,[ebp-80]
+ paddd xmm0,xmm6
+db 15,56,205,222
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm3
+db 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+db 15,56,204,238
+db 15,56,203,202
+ movdqa xmm0,[ebp-64]
+ paddd xmm0,xmm3
+db 15,56,205,227
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm4
+db 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+db 15,56,204,243
+db 15,56,203,202
+ movdqa xmm0,[ebp-48]
+ paddd xmm0,xmm4
+db 15,56,205,236
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm5
+db 102,15,58,15,252,4
+ nop
+ paddd xmm6,xmm7
+db 15,56,204,220
+db 15,56,203,202
+ movdqa xmm0,[ebp-32]
+ paddd xmm0,xmm5
+db 15,56,205,245
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm6
+db 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+db 15,56,204,229
+db 15,56,203,202
+ movdqa xmm0,[ebp-16]
+ paddd xmm0,xmm6
+db 15,56,205,222
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm3
+db 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+db 15,56,204,238
+db 15,56,203,202
+ movdqa xmm0,[ebp]
+ paddd xmm0,xmm3
+db 15,56,205,227
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm4
+db 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+db 15,56,204,243
+db 15,56,203,202
+ movdqa xmm0,[16+ebp]
+ paddd xmm0,xmm4
+db 15,56,205,236
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm5
+db 102,15,58,15,252,4
+ nop
+ paddd xmm6,xmm7
+db 15,56,204,220
+db 15,56,203,202
+ movdqa xmm0,[32+ebp]
+ paddd xmm0,xmm5
+db 15,56,205,245
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm6
+db 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+db 15,56,204,229
+db 15,56,203,202
+ movdqa xmm0,[48+ebp]
+ paddd xmm0,xmm6
+db 15,56,205,222
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm3
+db 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+db 15,56,204,238
+db 15,56,203,202
+ movdqa xmm0,[64+ebp]
+ paddd xmm0,xmm3
+db 15,56,205,227
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm4
+db 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+db 15,56,204,243
+db 15,56,203,202
+ movdqa xmm0,[80+ebp]
+ paddd xmm0,xmm4
+db 15,56,205,236
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ movdqa xmm7,xmm5
+db 102,15,58,15,252,4
+db 15,56,203,202
+ paddd xmm6,xmm7
+ movdqa xmm0,[96+ebp]
+ paddd xmm0,xmm5
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+db 15,56,205,245
+ movdqa xmm7,[128+ebp]
+db 15,56,203,202
+ movdqa xmm0,[112+ebp]
+ paddd xmm0,xmm6
+ nop
+db 15,56,203,209
+ pshufd xmm0,xmm0,14
+ cmp eax,edi
+ nop
+db 15,56,203,202
+ paddd xmm2,[16+esp]
+ paddd xmm1,[esp]
+ jnz NEAR L$010loop_shaext
+ pshufd xmm2,xmm2,177
+ pshufd xmm7,xmm1,27
+ pshufd xmm1,xmm1,177
+ punpckhqdq xmm1,xmm2
+db 102,15,58,15,215,8
+ mov esp,DWORD [44+esp]
+ movdqu [esi],xmm1
+ movdqu [16+esi],xmm2
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 32
+L$005SSSE3:
+ lea esp,[esp-96]
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edi,DWORD [12+esi]
+ mov DWORD [4+esp],ebx
+ xor ebx,ecx
+ mov DWORD [8+esp],ecx
+ mov DWORD [12+esp],edi
+ mov edx,DWORD [16+esi]
+ mov edi,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov esi,DWORD [28+esi]
+ mov DWORD [20+esp],edi
+ mov edi,DWORD [100+esp]
+ mov DWORD [24+esp],ecx
+ mov DWORD [28+esp],esi
+ movdqa xmm7,[256+ebp]
+ jmp NEAR L$011grand_ssse3
+align 16
+L$011grand_ssse3:
+ movdqu xmm0,[edi]
+ movdqu xmm1,[16+edi]
+ movdqu xmm2,[32+edi]
+ movdqu xmm3,[48+edi]
+ add edi,64
+db 102,15,56,0,199
+ mov DWORD [100+esp],edi
+db 102,15,56,0,207
+ movdqa xmm4,[ebp]
+db 102,15,56,0,215
+ movdqa xmm5,[16+ebp]
+ paddd xmm4,xmm0
+db 102,15,56,0,223
+ movdqa xmm6,[32+ebp]
+ paddd xmm5,xmm1
+ movdqa xmm7,[48+ebp]
+ movdqa [32+esp],xmm4
+ paddd xmm6,xmm2
+ movdqa [48+esp],xmm5
+ paddd xmm7,xmm3
+ movdqa [64+esp],xmm6
+ movdqa [80+esp],xmm7
+ jmp NEAR L$012ssse3_00_47
+align 16
+L$012ssse3_00_47:
+ add ebp,64
+ mov ecx,edx
+ movdqa xmm4,xmm1
+ ror edx,14
+ mov esi,DWORD [20+esp]
+ movdqa xmm7,xmm3
+ xor edx,ecx
+ mov edi,DWORD [24+esp]
+db 102,15,58,15,224,4
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+db 102,15,58,15,250,4
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ movdqa xmm5,xmm4
+ ror edx,6
+ mov ecx,eax
+ movdqa xmm6,xmm4
+ add edx,edi
+ mov edi,DWORD [4+esp]
+ psrld xmm4,3
+ mov esi,eax
+ ror ecx,9
+ paddd xmm0,xmm7
+ mov DWORD [esp],eax
+ xor ecx,eax
+ psrld xmm6,7
+ xor eax,edi
+ add edx,DWORD [28+esp]
+ ror ecx,11
+ and ebx,eax
+ pshufd xmm7,xmm3,250
+ xor ecx,esi
+ add edx,DWORD [32+esp]
+ pslld xmm5,14
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm4,xmm6
+ add ebx,edx
+ add edx,DWORD [12+esp]
+ psrld xmm6,11
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm4,xmm5
+ mov esi,DWORD [16+esp]
+ xor edx,ecx
+ pslld xmm5,11
+ mov edi,DWORD [20+esp]
+ xor esi,edi
+ ror edx,5
+ pxor xmm4,xmm6
+ and esi,ecx
+ mov DWORD [12+esp],ecx
+ movdqa xmm6,xmm7
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ pxor xmm4,xmm5
+ mov ecx,ebx
+ add edx,edi
+ psrld xmm7,10
+ mov edi,DWORD [esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm0,xmm4
+ mov DWORD [28+esp],ebx
+ xor ecx,ebx
+ psrlq xmm6,17
+ xor ebx,edi
+ add edx,DWORD [24+esp]
+ ror ecx,11
+ pxor xmm7,xmm6
+ and eax,ebx
+ xor ecx,esi
+ psrlq xmm6,2
+ add edx,DWORD [36+esp]
+ xor eax,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add eax,edx
+ add edx,DWORD [8+esp]
+ pshufd xmm7,xmm7,128
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [12+esp]
+ xor edx,ecx
+ mov edi,DWORD [16+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ psrldq xmm7,8
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ paddd xmm0,xmm7
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [28+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [24+esp],eax
+ pshufd xmm7,xmm0,80
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [20+esp]
+ movdqa xmm6,xmm7
+ ror ecx,11
+ psrld xmm7,10
+ and ebx,eax
+ psrlq xmm6,17
+ xor ecx,esi
+ add edx,DWORD [40+esp]
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add ebx,edx
+ add edx,DWORD [4+esp]
+ psrlq xmm6,2
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm7,xmm6
+ mov esi,DWORD [8+esp]
+ xor edx,ecx
+ mov edi,DWORD [12+esp]
+ pshufd xmm7,xmm7,8
+ xor esi,edi
+ ror edx,5
+ movdqa xmm6,[ebp]
+ and esi,ecx
+ mov DWORD [4+esp],ecx
+ pslldq xmm7,8
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [24+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm0,xmm7
+ mov DWORD [20+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [16+esp]
+ paddd xmm6,xmm0
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [44+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,ecx
+ movdqa [32+esp],xmm6
+ mov ecx,edx
+ movdqa xmm4,xmm2
+ ror edx,14
+ mov esi,DWORD [4+esp]
+ movdqa xmm7,xmm0
+ xor edx,ecx
+ mov edi,DWORD [8+esp]
+db 102,15,58,15,225,4
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+db 102,15,58,15,251,4
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ movdqa xmm5,xmm4
+ ror edx,6
+ mov ecx,eax
+ movdqa xmm6,xmm4
+ add edx,edi
+ mov edi,DWORD [20+esp]
+ psrld xmm4,3
+ mov esi,eax
+ ror ecx,9
+ paddd xmm1,xmm7
+ mov DWORD [16+esp],eax
+ xor ecx,eax
+ psrld xmm6,7
+ xor eax,edi
+ add edx,DWORD [12+esp]
+ ror ecx,11
+ and ebx,eax
+ pshufd xmm7,xmm0,250
+ xor ecx,esi
+ add edx,DWORD [48+esp]
+ pslld xmm5,14
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm4,xmm6
+ add ebx,edx
+ add edx,DWORD [28+esp]
+ psrld xmm6,11
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm4,xmm5
+ mov esi,DWORD [esp]
+ xor edx,ecx
+ pslld xmm5,11
+ mov edi,DWORD [4+esp]
+ xor esi,edi
+ ror edx,5
+ pxor xmm4,xmm6
+ and esi,ecx
+ mov DWORD [28+esp],ecx
+ movdqa xmm6,xmm7
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ pxor xmm4,xmm5
+ mov ecx,ebx
+ add edx,edi
+ psrld xmm7,10
+ mov edi,DWORD [16+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm1,xmm4
+ mov DWORD [12+esp],ebx
+ xor ecx,ebx
+ psrlq xmm6,17
+ xor ebx,edi
+ add edx,DWORD [8+esp]
+ ror ecx,11
+ pxor xmm7,xmm6
+ and eax,ebx
+ xor ecx,esi
+ psrlq xmm6,2
+ add edx,DWORD [52+esp]
+ xor eax,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add eax,edx
+ add edx,DWORD [24+esp]
+ pshufd xmm7,xmm7,128
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [28+esp]
+ xor edx,ecx
+ mov edi,DWORD [esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ psrldq xmm7,8
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ paddd xmm1,xmm7
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [12+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [8+esp],eax
+ pshufd xmm7,xmm1,80
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [4+esp]
+ movdqa xmm6,xmm7
+ ror ecx,11
+ psrld xmm7,10
+ and ebx,eax
+ psrlq xmm6,17
+ xor ecx,esi
+ add edx,DWORD [56+esp]
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add ebx,edx
+ add edx,DWORD [20+esp]
+ psrlq xmm6,2
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm7,xmm6
+ mov esi,DWORD [24+esp]
+ xor edx,ecx
+ mov edi,DWORD [28+esp]
+ pshufd xmm7,xmm7,8
+ xor esi,edi
+ ror edx,5
+ movdqa xmm6,[16+ebp]
+ and esi,ecx
+ mov DWORD [20+esp],ecx
+ pslldq xmm7,8
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [8+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm1,xmm7
+ mov DWORD [4+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [esp]
+ paddd xmm6,xmm1
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [60+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,ecx
+ movdqa [48+esp],xmm6
+ mov ecx,edx
+ movdqa xmm4,xmm3
+ ror edx,14
+ mov esi,DWORD [20+esp]
+ movdqa xmm7,xmm1
+ xor edx,ecx
+ mov edi,DWORD [24+esp]
+db 102,15,58,15,226,4
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+db 102,15,58,15,248,4
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ movdqa xmm5,xmm4
+ ror edx,6
+ mov ecx,eax
+ movdqa xmm6,xmm4
+ add edx,edi
+ mov edi,DWORD [4+esp]
+ psrld xmm4,3
+ mov esi,eax
+ ror ecx,9
+ paddd xmm2,xmm7
+ mov DWORD [esp],eax
+ xor ecx,eax
+ psrld xmm6,7
+ xor eax,edi
+ add edx,DWORD [28+esp]
+ ror ecx,11
+ and ebx,eax
+ pshufd xmm7,xmm1,250
+ xor ecx,esi
+ add edx,DWORD [64+esp]
+ pslld xmm5,14
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm4,xmm6
+ add ebx,edx
+ add edx,DWORD [12+esp]
+ psrld xmm6,11
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm4,xmm5
+ mov esi,DWORD [16+esp]
+ xor edx,ecx
+ pslld xmm5,11
+ mov edi,DWORD [20+esp]
+ xor esi,edi
+ ror edx,5
+ pxor xmm4,xmm6
+ and esi,ecx
+ mov DWORD [12+esp],ecx
+ movdqa xmm6,xmm7
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ pxor xmm4,xmm5
+ mov ecx,ebx
+ add edx,edi
+ psrld xmm7,10
+ mov edi,DWORD [esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm2,xmm4
+ mov DWORD [28+esp],ebx
+ xor ecx,ebx
+ psrlq xmm6,17
+ xor ebx,edi
+ add edx,DWORD [24+esp]
+ ror ecx,11
+ pxor xmm7,xmm6
+ and eax,ebx
+ xor ecx,esi
+ psrlq xmm6,2
+ add edx,DWORD [68+esp]
+ xor eax,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add eax,edx
+ add edx,DWORD [8+esp]
+ pshufd xmm7,xmm7,128
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [12+esp]
+ xor edx,ecx
+ mov edi,DWORD [16+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ psrldq xmm7,8
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ paddd xmm2,xmm7
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [28+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [24+esp],eax
+ pshufd xmm7,xmm2,80
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [20+esp]
+ movdqa xmm6,xmm7
+ ror ecx,11
+ psrld xmm7,10
+ and ebx,eax
+ psrlq xmm6,17
+ xor ecx,esi
+ add edx,DWORD [72+esp]
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add ebx,edx
+ add edx,DWORD [4+esp]
+ psrlq xmm6,2
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm7,xmm6
+ mov esi,DWORD [8+esp]
+ xor edx,ecx
+ mov edi,DWORD [12+esp]
+ pshufd xmm7,xmm7,8
+ xor esi,edi
+ ror edx,5
+ movdqa xmm6,[32+ebp]
+ and esi,ecx
+ mov DWORD [4+esp],ecx
+ pslldq xmm7,8
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [24+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm2,xmm7
+ mov DWORD [20+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [16+esp]
+ paddd xmm6,xmm2
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [76+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,ecx
+ movdqa [64+esp],xmm6
+ mov ecx,edx
+ movdqa xmm4,xmm0
+ ror edx,14
+ mov esi,DWORD [4+esp]
+ movdqa xmm7,xmm2
+ xor edx,ecx
+ mov edi,DWORD [8+esp]
+db 102,15,58,15,227,4
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+db 102,15,58,15,249,4
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ movdqa xmm5,xmm4
+ ror edx,6
+ mov ecx,eax
+ movdqa xmm6,xmm4
+ add edx,edi
+ mov edi,DWORD [20+esp]
+ psrld xmm4,3
+ mov esi,eax
+ ror ecx,9
+ paddd xmm3,xmm7
+ mov DWORD [16+esp],eax
+ xor ecx,eax
+ psrld xmm6,7
+ xor eax,edi
+ add edx,DWORD [12+esp]
+ ror ecx,11
+ and ebx,eax
+ pshufd xmm7,xmm2,250
+ xor ecx,esi
+ add edx,DWORD [80+esp]
+ pslld xmm5,14
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm4,xmm6
+ add ebx,edx
+ add edx,DWORD [28+esp]
+ psrld xmm6,11
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm4,xmm5
+ mov esi,DWORD [esp]
+ xor edx,ecx
+ pslld xmm5,11
+ mov edi,DWORD [4+esp]
+ xor esi,edi
+ ror edx,5
+ pxor xmm4,xmm6
+ and esi,ecx
+ mov DWORD [28+esp],ecx
+ movdqa xmm6,xmm7
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ pxor xmm4,xmm5
+ mov ecx,ebx
+ add edx,edi
+ psrld xmm7,10
+ mov edi,DWORD [16+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm3,xmm4
+ mov DWORD [12+esp],ebx
+ xor ecx,ebx
+ psrlq xmm6,17
+ xor ebx,edi
+ add edx,DWORD [8+esp]
+ ror ecx,11
+ pxor xmm7,xmm6
+ and eax,ebx
+ xor ecx,esi
+ psrlq xmm6,2
+ add edx,DWORD [84+esp]
+ xor eax,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add eax,edx
+ add edx,DWORD [24+esp]
+ pshufd xmm7,xmm7,128
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [28+esp]
+ xor edx,ecx
+ mov edi,DWORD [esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ psrldq xmm7,8
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ paddd xmm3,xmm7
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [12+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [8+esp],eax
+ pshufd xmm7,xmm3,80
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [4+esp]
+ movdqa xmm6,xmm7
+ ror ecx,11
+ psrld xmm7,10
+ and ebx,eax
+ psrlq xmm6,17
+ xor ecx,esi
+ add edx,DWORD [88+esp]
+ xor ebx,edi
+ ror ecx,2
+ pxor xmm7,xmm6
+ add ebx,edx
+ add edx,DWORD [20+esp]
+ psrlq xmm6,2
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ pxor xmm7,xmm6
+ mov esi,DWORD [24+esp]
+ xor edx,ecx
+ mov edi,DWORD [28+esp]
+ pshufd xmm7,xmm7,8
+ xor esi,edi
+ ror edx,5
+ movdqa xmm6,[48+ebp]
+ and esi,ecx
+ mov DWORD [20+esp],ecx
+ pslldq xmm7,8
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [8+esp]
+ mov esi,ebx
+ ror ecx,9
+ paddd xmm3,xmm7
+ mov DWORD [4+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [esp]
+ paddd xmm6,xmm3
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [92+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,ecx
+ movdqa [80+esp],xmm6
+ cmp DWORD [64+ebp],66051
+ jne NEAR L$012ssse3_00_47
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [20+esp]
+ xor edx,ecx
+ mov edi,DWORD [24+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [4+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [28+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [32+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [12+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [16+esp]
+ xor edx,ecx
+ mov edi,DWORD [20+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [12+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [28+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [24+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [36+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [12+esp]
+ xor edx,ecx
+ mov edi,DWORD [16+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [28+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [24+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [20+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [40+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [4+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [8+esp]
+ xor edx,ecx
+ mov edi,DWORD [12+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [4+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [24+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [20+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [16+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [44+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [4+esp]
+ xor edx,ecx
+ mov edi,DWORD [8+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [20+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [16+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [12+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [48+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [28+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [esp]
+ xor edx,ecx
+ mov edi,DWORD [4+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [28+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [16+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [12+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [8+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [52+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [28+esp]
+ xor edx,ecx
+ mov edi,DWORD [esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [12+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [8+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [4+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [56+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [20+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [24+esp]
+ xor edx,ecx
+ mov edi,DWORD [28+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [20+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [8+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [4+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [60+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [20+esp]
+ xor edx,ecx
+ mov edi,DWORD [24+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [16+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [4+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [28+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [64+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [12+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [16+esp]
+ xor edx,ecx
+ mov edi,DWORD [20+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [12+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [28+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [24+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [68+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [8+esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [12+esp]
+ xor edx,ecx
+ mov edi,DWORD [16+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [8+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [28+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [24+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [20+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [72+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [4+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [8+esp]
+ xor edx,ecx
+ mov edi,DWORD [12+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [4+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [24+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [20+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [16+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [76+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [4+esp]
+ xor edx,ecx
+ mov edi,DWORD [8+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [20+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [16+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [12+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [80+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [28+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [esp]
+ xor edx,ecx
+ mov edi,DWORD [4+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [28+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [16+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [12+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [8+esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [84+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [24+esp]
+ add eax,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [28+esp]
+ xor edx,ecx
+ mov edi,DWORD [esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [24+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,eax
+ add edx,edi
+ mov edi,DWORD [12+esp]
+ mov esi,eax
+ ror ecx,9
+ mov DWORD [8+esp],eax
+ xor ecx,eax
+ xor eax,edi
+ add edx,DWORD [4+esp]
+ ror ecx,11
+ and ebx,eax
+ xor ecx,esi
+ add edx,DWORD [88+esp]
+ xor ebx,edi
+ ror ecx,2
+ add ebx,edx
+ add edx,DWORD [20+esp]
+ add ebx,ecx
+ mov ecx,edx
+ ror edx,14
+ mov esi,DWORD [24+esp]
+ xor edx,ecx
+ mov edi,DWORD [28+esp]
+ xor esi,edi
+ ror edx,5
+ and esi,ecx
+ mov DWORD [20+esp],ecx
+ xor edx,ecx
+ xor edi,esi
+ ror edx,6
+ mov ecx,ebx
+ add edx,edi
+ mov edi,DWORD [8+esp]
+ mov esi,ebx
+ ror ecx,9
+ mov DWORD [4+esp],ebx
+ xor ecx,ebx
+ xor ebx,edi
+ add edx,DWORD [esp]
+ ror ecx,11
+ and eax,ebx
+ xor ecx,esi
+ add edx,DWORD [92+esp]
+ xor eax,edi
+ ror ecx,2
+ add eax,edx
+ add edx,DWORD [16+esp]
+ add eax,ecx
+ mov esi,DWORD [96+esp]
+ xor ebx,edi
+ mov ecx,DWORD [12+esp]
+ add eax,DWORD [esi]
+ add ebx,DWORD [4+esi]
+ add edi,DWORD [8+esi]
+ add ecx,DWORD [12+esi]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ mov DWORD [8+esi],edi
+ mov DWORD [12+esi],ecx
+ mov DWORD [4+esp],ebx
+ xor ebx,edi
+ mov DWORD [8+esp],edi
+ mov DWORD [12+esp],ecx
+ mov edi,DWORD [20+esp]
+ mov ecx,DWORD [24+esp]
+ add edx,DWORD [16+esi]
+ add edi,DWORD [20+esi]
+ add ecx,DWORD [24+esi]
+ mov DWORD [16+esi],edx
+ mov DWORD [20+esi],edi
+ mov DWORD [20+esp],edi
+ mov edi,DWORD [28+esp]
+ mov DWORD [24+esi],ecx
+ add edi,DWORD [28+esi]
+ mov DWORD [24+esp],ecx
+ mov DWORD [28+esi],edi
+ mov DWORD [28+esp],edi
+ mov edi,DWORD [100+esp]
+ movdqa xmm7,[64+ebp]
+ sub ebp,192
+ cmp edi,DWORD [104+esp]
+ jb NEAR L$011grand_ssse3
+ mov esp,DWORD [108+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/win-x86/crypto/sha/sha512-586.asm b/win-x86/crypto/sha/sha512-586.asm
new file mode 100644
index 0000000..88ed0b3
--- /dev/null
+++ b/win-x86/crypto/sha/sha512-586.asm
@@ -0,0 +1,2843 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01010000h
+%error yasm version 1.1.0 or later needed.
+%endif
+; Yasm automatically includes .00 and complains about redefining it.
+; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
+%else
+$@feat.00 equ 1
+%endif
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _sha512_block_data_order
+align 16
+_sha512_block_data_order:
+L$_sha512_block_data_order_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov eax,DWORD [28+esp]
+ mov ebx,esp
+ call L$000pic_point
+L$000pic_point:
+ pop ebp
+ lea ebp,[(L$001K512-L$000pic_point)+ebp]
+ sub esp,16
+ and esp,-64
+ shl eax,7
+ add eax,edi
+ mov DWORD [esp],esi
+ mov DWORD [4+esp],edi
+ mov DWORD [8+esp],eax
+ mov DWORD [12+esp],ebx
+ lea edx,[_OPENSSL_ia32cap_P]
+ mov ecx,DWORD [edx]
+ test ecx,67108864
+ jz NEAR L$002loop_x86
+ mov edx,DWORD [4+edx]
+ movq mm0,[esi]
+ and ecx,16777216
+ movq mm1,[8+esi]
+ and edx,512
+ movq mm2,[16+esi]
+ or ecx,edx
+ movq mm3,[24+esi]
+ movq mm4,[32+esi]
+ movq mm5,[40+esi]
+ movq mm6,[48+esi]
+ movq mm7,[56+esi]
+ cmp ecx,16777728
+ je NEAR L$003SSSE3
+ sub esp,80
+ jmp NEAR L$004loop_sse2
+align 16
+L$004loop_sse2:
+ movq [8+esp],mm1
+ movq [16+esp],mm2
+ movq [24+esp],mm3
+ movq [40+esp],mm5
+ movq [48+esp],mm6
+ pxor mm2,mm1
+ movq [56+esp],mm7
+ movq mm3,mm0
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ add edi,8
+ mov edx,15
+ bswap eax
+ bswap ebx
+ jmp NEAR L$00500_14_sse2
+align 16
+L$00500_14_sse2:
+ movd mm1,eax
+ mov eax,DWORD [edi]
+ movd mm7,ebx
+ mov ebx,DWORD [4+edi]
+ add edi,8
+ bswap eax
+ bswap ebx
+ punpckldq mm7,mm1
+ movq mm1,mm4
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ movq mm0,mm3
+ movq [72+esp],mm7
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ paddq mm7,[ebp]
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ sub esp,8
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[40+esp]
+ paddq mm3,mm2
+ movq mm2,mm0
+ add ebp,8
+ paddq mm3,mm6
+ movq mm6,[48+esp]
+ dec edx
+ jnz NEAR L$00500_14_sse2
+ movd mm1,eax
+ movd mm7,ebx
+ punpckldq mm7,mm1
+ movq mm1,mm4
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ movq mm0,mm3
+ movq [72+esp],mm7
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ paddq mm7,[ebp]
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ sub esp,8
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm7,[192+esp]
+ paddq mm3,mm2
+ movq mm2,mm0
+ add ebp,8
+ paddq mm3,mm6
+ pxor mm0,mm0
+ mov edx,32
+ jmp NEAR L$00616_79_sse2
+align 16
+L$00616_79_sse2:
+ movq mm5,[88+esp]
+ movq mm1,mm7
+ psrlq mm7,1
+ movq mm6,mm5
+ psrlq mm5,6
+ psllq mm1,56
+ paddq mm0,mm3
+ movq mm3,mm7
+ psrlq mm7,6
+ pxor mm3,mm1
+ psllq mm1,7
+ pxor mm3,mm7
+ psrlq mm7,1
+ pxor mm3,mm1
+ movq mm1,mm5
+ psrlq mm5,13
+ pxor mm7,mm3
+ psllq mm6,3
+ pxor mm1,mm5
+ paddq mm7,[200+esp]
+ pxor mm1,mm6
+ psrlq mm5,42
+ paddq mm7,[128+esp]
+ pxor mm1,mm5
+ psllq mm6,42
+ movq mm5,[40+esp]
+ pxor mm1,mm6
+ movq mm6,[48+esp]
+ paddq mm7,mm1
+ movq mm1,mm4
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ movq [72+esp],mm7
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ paddq mm7,[ebp]
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ sub esp,8
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm7,[192+esp]
+ paddq mm2,mm6
+ add ebp,8
+ movq mm5,[88+esp]
+ movq mm1,mm7
+ psrlq mm7,1
+ movq mm6,mm5
+ psrlq mm5,6
+ psllq mm1,56
+ paddq mm2,mm3
+ movq mm3,mm7
+ psrlq mm7,6
+ pxor mm3,mm1
+ psllq mm1,7
+ pxor mm3,mm7
+ psrlq mm7,1
+ pxor mm3,mm1
+ movq mm1,mm5
+ psrlq mm5,13
+ pxor mm7,mm3
+ psllq mm6,3
+ pxor mm1,mm5
+ paddq mm7,[200+esp]
+ pxor mm1,mm6
+ psrlq mm5,42
+ paddq mm7,[128+esp]
+ pxor mm1,mm5
+ psllq mm6,42
+ movq mm5,[40+esp]
+ pxor mm1,mm6
+ movq mm6,[48+esp]
+ paddq mm7,mm1
+ movq mm1,mm4
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ movq [72+esp],mm7
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ paddq mm7,[ebp]
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ sub esp,8
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm7,[192+esp]
+ paddq mm0,mm6
+ add ebp,8
+ dec edx
+ jnz NEAR L$00616_79_sse2
+ paddq mm0,mm3
+ movq mm1,[8+esp]
+ movq mm3,[24+esp]
+ movq mm5,[40+esp]
+ movq mm6,[48+esp]
+ movq mm7,[56+esp]
+ pxor mm2,mm1
+ paddq mm0,[esi]
+ paddq mm1,[8+esi]
+ paddq mm2,[16+esi]
+ paddq mm3,[24+esi]
+ paddq mm4,[32+esi]
+ paddq mm5,[40+esi]
+ paddq mm6,[48+esi]
+ paddq mm7,[56+esi]
+ mov eax,640
+ movq [esi],mm0
+ movq [8+esi],mm1
+ movq [16+esi],mm2
+ movq [24+esi],mm3
+ movq [32+esi],mm4
+ movq [40+esi],mm5
+ movq [48+esi],mm6
+ movq [56+esi],mm7
+ lea esp,[eax*1+esp]
+ sub ebp,eax
+ cmp edi,DWORD [88+esp]
+ jb NEAR L$004loop_sse2
+ mov esp,DWORD [92+esp]
+ emms
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 32
+L$003SSSE3:
+ lea edx,[esp-64]
+ sub esp,256
+ movdqa xmm1,[640+ebp]
+ movdqu xmm0,[edi]
+db 102,15,56,0,193
+ movdqa xmm3,[ebp]
+ movdqa xmm2,xmm1
+ movdqu xmm1,[16+edi]
+ paddq xmm3,xmm0
+db 102,15,56,0,202
+ movdqa [edx-128],xmm3
+ movdqa xmm4,[16+ebp]
+ movdqa xmm3,xmm2
+ movdqu xmm2,[32+edi]
+ paddq xmm4,xmm1
+db 102,15,56,0,211
+ movdqa [edx-112],xmm4
+ movdqa xmm5,[32+ebp]
+ movdqa xmm4,xmm3
+ movdqu xmm3,[48+edi]
+ paddq xmm5,xmm2
+db 102,15,56,0,220
+ movdqa [edx-96],xmm5
+ movdqa xmm6,[48+ebp]
+ movdqa xmm5,xmm4
+ movdqu xmm4,[64+edi]
+ paddq xmm6,xmm3
+db 102,15,56,0,229
+ movdqa [edx-80],xmm6
+ movdqa xmm7,[64+ebp]
+ movdqa xmm6,xmm5
+ movdqu xmm5,[80+edi]
+ paddq xmm7,xmm4
+db 102,15,56,0,238
+ movdqa [edx-64],xmm7
+ movdqa [edx],xmm0
+ movdqa xmm0,[80+ebp]
+ movdqa xmm7,xmm6
+ movdqu xmm6,[96+edi]
+ paddq xmm0,xmm5
+db 102,15,56,0,247
+ movdqa [edx-48],xmm0
+ movdqa [16+edx],xmm1
+ movdqa xmm1,[96+ebp]
+ movdqa xmm0,xmm7
+ movdqu xmm7,[112+edi]
+ paddq xmm1,xmm6
+db 102,15,56,0,248
+ movdqa [edx-32],xmm1
+ movdqa [32+edx],xmm2
+ movdqa xmm2,[112+ebp]
+ movdqa xmm0,[edx]
+ paddq xmm2,xmm7
+ movdqa [edx-16],xmm2
+ nop
+align 32
+L$007loop_ssse3:
+ movdqa xmm2,[16+edx]
+ movdqa [48+edx],xmm3
+ lea ebp,[128+ebp]
+ movq [8+esp],mm1
+ mov ebx,edi
+ movq [16+esp],mm2
+ lea edi,[128+edi]
+ movq [24+esp],mm3
+ cmp edi,eax
+ movq [40+esp],mm5
+ cmovb ebx,edi
+ movq [48+esp],mm6
+ mov ecx,4
+ pxor mm2,mm1
+ movq [56+esp],mm7
+ pxor mm3,mm3
+ jmp NEAR L$00800_47_ssse3
+align 32
+L$00800_47_ssse3:
+ movdqa xmm3,xmm5
+ movdqa xmm1,xmm2
+db 102,15,58,15,208,8
+ movdqa [edx],xmm4
+db 102,15,58,15,220,8
+ movdqa xmm4,xmm2
+ psrlq xmm2,7
+ paddq xmm0,xmm3
+ movdqa xmm3,xmm4
+ psrlq xmm4,1
+ psllq xmm3,56
+ pxor xmm2,xmm4
+ psrlq xmm4,7
+ pxor xmm2,xmm3
+ psllq xmm3,7
+ pxor xmm2,xmm4
+ movdqa xmm4,xmm7
+ pxor xmm2,xmm3
+ movdqa xmm3,xmm7
+ psrlq xmm4,6
+ paddq xmm0,xmm2
+ movdqa xmm2,xmm7
+ psrlq xmm3,19
+ psllq xmm2,3
+ pxor xmm4,xmm3
+ psrlq xmm3,42
+ pxor xmm4,xmm2
+ psllq xmm2,42
+ pxor xmm4,xmm3
+ movdqa xmm3,[32+edx]
+ pxor xmm4,xmm2
+ movdqa xmm2,[ebp]
+ movq mm1,mm4
+ paddq xmm0,xmm4
+ movq mm7,[edx-128]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ paddq xmm2,xmm0
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[32+esp]
+ paddq mm2,mm6
+ movq mm6,[40+esp]
+ movq mm1,mm4
+ movq mm7,[edx-120]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [24+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [56+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[48+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[16+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[24+esp]
+ paddq mm0,mm6
+ movq mm6,[32+esp]
+ movdqa [edx-128],xmm2
+ movdqa xmm4,xmm6
+ movdqa xmm2,xmm3
+db 102,15,58,15,217,8
+ movdqa [16+edx],xmm5
+db 102,15,58,15,229,8
+ movdqa xmm5,xmm3
+ psrlq xmm3,7
+ paddq xmm1,xmm4
+ movdqa xmm4,xmm5
+ psrlq xmm5,1
+ psllq xmm4,56
+ pxor xmm3,xmm5
+ psrlq xmm5,7
+ pxor xmm3,xmm4
+ psllq xmm4,7
+ pxor xmm3,xmm5
+ movdqa xmm5,xmm0
+ pxor xmm3,xmm4
+ movdqa xmm4,xmm0
+ psrlq xmm5,6
+ paddq xmm1,xmm3
+ movdqa xmm3,xmm0
+ psrlq xmm4,19
+ psllq xmm3,3
+ pxor xmm5,xmm4
+ psrlq xmm4,42
+ pxor xmm5,xmm3
+ psllq xmm3,42
+ pxor xmm5,xmm4
+ movdqa xmm4,[48+edx]
+ pxor xmm5,xmm3
+ movdqa xmm3,[16+ebp]
+ movq mm1,mm4
+ paddq xmm1,xmm5
+ movq mm7,[edx-112]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [16+esp],mm4
+ paddq xmm3,xmm1
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [48+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[40+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[8+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[56+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[16+esp]
+ paddq mm2,mm6
+ movq mm6,[24+esp]
+ movq mm1,mm4
+ movq mm7,[edx-104]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [8+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [40+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[32+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[48+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[8+esp]
+ paddq mm0,mm6
+ movq mm6,[16+esp]
+ movdqa [edx-112],xmm3
+ movdqa xmm5,xmm7
+ movdqa xmm3,xmm4
+db 102,15,58,15,226,8
+ movdqa [32+edx],xmm6
+db 102,15,58,15,238,8
+ movdqa xmm6,xmm4
+ psrlq xmm4,7
+ paddq xmm2,xmm5
+ movdqa xmm5,xmm6
+ psrlq xmm6,1
+ psllq xmm5,56
+ pxor xmm4,xmm6
+ psrlq xmm6,7
+ pxor xmm4,xmm5
+ psllq xmm5,7
+ pxor xmm4,xmm6
+ movdqa xmm6,xmm1
+ pxor xmm4,xmm5
+ movdqa xmm5,xmm1
+ psrlq xmm6,6
+ paddq xmm2,xmm4
+ movdqa xmm4,xmm1
+ psrlq xmm5,19
+ psllq xmm4,3
+ pxor xmm6,xmm5
+ psrlq xmm5,42
+ pxor xmm6,xmm4
+ psllq xmm4,42
+ pxor xmm6,xmm5
+ movdqa xmm5,[edx]
+ pxor xmm6,xmm4
+ movdqa xmm4,[32+ebp]
+ movq mm1,mm4
+ paddq xmm2,xmm6
+ movq mm7,[edx-96]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [esp],mm4
+ paddq xmm4,xmm2
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [32+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[24+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[56+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[40+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[esp]
+ paddq mm2,mm6
+ movq mm6,[8+esp]
+ movq mm1,mm4
+ movq mm7,[edx-88]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [56+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [24+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[16+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[48+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[32+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[56+esp]
+ paddq mm0,mm6
+ movq mm6,[esp]
+ movdqa [edx-96],xmm4
+ movdqa xmm6,xmm0
+ movdqa xmm4,xmm5
+db 102,15,58,15,235,8
+ movdqa [48+edx],xmm7
+db 102,15,58,15,247,8
+ movdqa xmm7,xmm5
+ psrlq xmm5,7
+ paddq xmm3,xmm6
+ movdqa xmm6,xmm7
+ psrlq xmm7,1
+ psllq xmm6,56
+ pxor xmm5,xmm7
+ psrlq xmm7,7
+ pxor xmm5,xmm6
+ psllq xmm6,7
+ pxor xmm5,xmm7
+ movdqa xmm7,xmm2
+ pxor xmm5,xmm6
+ movdqa xmm6,xmm2
+ psrlq xmm7,6
+ paddq xmm3,xmm5
+ movdqa xmm5,xmm2
+ psrlq xmm6,19
+ psllq xmm5,3
+ pxor xmm7,xmm6
+ psrlq xmm6,42
+ pxor xmm7,xmm5
+ psllq xmm5,42
+ pxor xmm7,xmm6
+ movdqa xmm6,[16+edx]
+ pxor xmm7,xmm5
+ movdqa xmm5,[48+ebp]
+ movq mm1,mm4
+ paddq xmm3,xmm7
+ movq mm7,[edx-80]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [48+esp],mm4
+ paddq xmm5,xmm3
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [16+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[8+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[40+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[24+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[48+esp]
+ paddq mm2,mm6
+ movq mm6,[56+esp]
+ movq mm1,mm4
+ movq mm7,[edx-72]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [40+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [8+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[32+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[16+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[40+esp]
+ paddq mm0,mm6
+ movq mm6,[48+esp]
+ movdqa [edx-80],xmm5
+ movdqa xmm7,xmm1
+ movdqa xmm5,xmm6
+db 102,15,58,15,244,8
+ movdqa [edx],xmm0
+db 102,15,58,15,248,8
+ movdqa xmm0,xmm6
+ psrlq xmm6,7
+ paddq xmm4,xmm7
+ movdqa xmm7,xmm0
+ psrlq xmm0,1
+ psllq xmm7,56
+ pxor xmm6,xmm0
+ psrlq xmm0,7
+ pxor xmm6,xmm7
+ psllq xmm7,7
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm3
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm3
+ psrlq xmm0,6
+ paddq xmm4,xmm6
+ movdqa xmm6,xmm3
+ psrlq xmm7,19
+ psllq xmm6,3
+ pxor xmm0,xmm7
+ psrlq xmm7,42
+ pxor xmm0,xmm6
+ psllq xmm6,42
+ pxor xmm0,xmm7
+ movdqa xmm7,[32+edx]
+ pxor xmm0,xmm6
+ movdqa xmm6,[64+ebp]
+ movq mm1,mm4
+ paddq xmm4,xmm0
+ movq mm7,[edx-64]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ paddq xmm6,xmm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[32+esp]
+ paddq mm2,mm6
+ movq mm6,[40+esp]
+ movq mm1,mm4
+ movq mm7,[edx-56]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [24+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [56+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[48+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[16+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[24+esp]
+ paddq mm0,mm6
+ movq mm6,[32+esp]
+ movdqa [edx-64],xmm6
+ movdqa xmm0,xmm2
+ movdqa xmm6,xmm7
+db 102,15,58,15,253,8
+ movdqa [16+edx],xmm1
+db 102,15,58,15,193,8
+ movdqa xmm1,xmm7
+ psrlq xmm7,7
+ paddq xmm5,xmm0
+ movdqa xmm0,xmm1
+ psrlq xmm1,1
+ psllq xmm0,56
+ pxor xmm7,xmm1
+ psrlq xmm1,7
+ pxor xmm7,xmm0
+ psllq xmm0,7
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm4
+ pxor xmm7,xmm0
+ movdqa xmm0,xmm4
+ psrlq xmm1,6
+ paddq xmm5,xmm7
+ movdqa xmm7,xmm4
+ psrlq xmm0,19
+ psllq xmm7,3
+ pxor xmm1,xmm0
+ psrlq xmm0,42
+ pxor xmm1,xmm7
+ psllq xmm7,42
+ pxor xmm1,xmm0
+ movdqa xmm0,[48+edx]
+ pxor xmm1,xmm7
+ movdqa xmm7,[80+ebp]
+ movq mm1,mm4
+ paddq xmm5,xmm1
+ movq mm7,[edx-48]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [16+esp],mm4
+ paddq xmm7,xmm5
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [48+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[40+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[8+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[56+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[16+esp]
+ paddq mm2,mm6
+ movq mm6,[24+esp]
+ movq mm1,mm4
+ movq mm7,[edx-40]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [8+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [40+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[32+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[48+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[8+esp]
+ paddq mm0,mm6
+ movq mm6,[16+esp]
+ movdqa [edx-48],xmm7
+ movdqa xmm1,xmm3
+ movdqa xmm7,xmm0
+db 102,15,58,15,198,8
+ movdqa [32+edx],xmm2
+db 102,15,58,15,202,8
+ movdqa xmm2,xmm0
+ psrlq xmm0,7
+ paddq xmm6,xmm1
+ movdqa xmm1,xmm2
+ psrlq xmm2,1
+ psllq xmm1,56
+ pxor xmm0,xmm2
+ psrlq xmm2,7
+ pxor xmm0,xmm1
+ psllq xmm1,7
+ pxor xmm0,xmm2
+ movdqa xmm2,xmm5
+ pxor xmm0,xmm1
+ movdqa xmm1,xmm5
+ psrlq xmm2,6
+ paddq xmm6,xmm0
+ movdqa xmm0,xmm5
+ psrlq xmm1,19
+ psllq xmm0,3
+ pxor xmm2,xmm1
+ psrlq xmm1,42
+ pxor xmm2,xmm0
+ psllq xmm0,42
+ pxor xmm2,xmm1
+ movdqa xmm1,[edx]
+ pxor xmm2,xmm0
+ movdqa xmm0,[96+ebp]
+ movq mm1,mm4
+ paddq xmm6,xmm2
+ movq mm7,[edx-32]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [esp],mm4
+ paddq xmm0,xmm6
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [32+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[24+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[56+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[40+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[esp]
+ paddq mm2,mm6
+ movq mm6,[8+esp]
+ movq mm1,mm4
+ movq mm7,[edx-24]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [56+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [24+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[16+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[48+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[32+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[56+esp]
+ paddq mm0,mm6
+ movq mm6,[esp]
+ movdqa [edx-32],xmm0
+ movdqa xmm2,xmm4
+ movdqa xmm0,xmm1
+db 102,15,58,15,207,8
+ movdqa [48+edx],xmm3
+db 102,15,58,15,211,8
+ movdqa xmm3,xmm1
+ psrlq xmm1,7
+ paddq xmm7,xmm2
+ movdqa xmm2,xmm3
+ psrlq xmm3,1
+ psllq xmm2,56
+ pxor xmm1,xmm3
+ psrlq xmm3,7
+ pxor xmm1,xmm2
+ psllq xmm2,7
+ pxor xmm1,xmm3
+ movdqa xmm3,xmm6
+ pxor xmm1,xmm2
+ movdqa xmm2,xmm6
+ psrlq xmm3,6
+ paddq xmm7,xmm1
+ movdqa xmm1,xmm6
+ psrlq xmm2,19
+ psllq xmm1,3
+ pxor xmm3,xmm2
+ psrlq xmm2,42
+ pxor xmm3,xmm1
+ psllq xmm1,42
+ pxor xmm3,xmm2
+ movdqa xmm2,[16+edx]
+ pxor xmm3,xmm1
+ movdqa xmm1,[112+ebp]
+ movq mm1,mm4
+ paddq xmm7,xmm3
+ movq mm7,[edx-16]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [48+esp],mm4
+ paddq xmm1,xmm7
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [16+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[8+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[40+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[24+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[48+esp]
+ paddq mm2,mm6
+ movq mm6,[56+esp]
+ movq mm1,mm4
+ movq mm7,[edx-8]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [40+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [8+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[32+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[16+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[40+esp]
+ paddq mm0,mm6
+ movq mm6,[48+esp]
+ movdqa [edx-16],xmm1
+ lea ebp,[128+ebp]
+ dec ecx
+ jnz NEAR L$00800_47_ssse3
+ movdqa xmm1,[ebp]
+ lea ebp,[ebp-640]
+ movdqu xmm0,[ebx]
+db 102,15,56,0,193
+ movdqa xmm3,[ebp]
+ movdqa xmm2,xmm1
+ movdqu xmm1,[16+ebx]
+ paddq xmm3,xmm0
+db 102,15,56,0,202
+ movq mm1,mm4
+ movq mm7,[edx-128]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[32+esp]
+ paddq mm2,mm6
+ movq mm6,[40+esp]
+ movq mm1,mm4
+ movq mm7,[edx-120]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [24+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [56+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[48+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[16+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[24+esp]
+ paddq mm0,mm6
+ movq mm6,[32+esp]
+ movdqa [edx-128],xmm3
+ movdqa xmm4,[16+ebp]
+ movdqa xmm3,xmm2
+ movdqu xmm2,[32+ebx]
+ paddq xmm4,xmm1
+db 102,15,56,0,211
+ movq mm1,mm4
+ movq mm7,[edx-112]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [16+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [48+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[40+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[8+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[56+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[16+esp]
+ paddq mm2,mm6
+ movq mm6,[24+esp]
+ movq mm1,mm4
+ movq mm7,[edx-104]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [8+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [40+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[32+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[48+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[8+esp]
+ paddq mm0,mm6
+ movq mm6,[16+esp]
+ movdqa [edx-112],xmm4
+ movdqa xmm5,[32+ebp]
+ movdqa xmm4,xmm3
+ movdqu xmm3,[48+ebx]
+ paddq xmm5,xmm2
+db 102,15,56,0,220
+ movq mm1,mm4
+ movq mm7,[edx-96]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [32+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[24+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[56+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[40+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[esp]
+ paddq mm2,mm6
+ movq mm6,[8+esp]
+ movq mm1,mm4
+ movq mm7,[edx-88]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [56+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [24+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[16+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[48+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[32+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[56+esp]
+ paddq mm0,mm6
+ movq mm6,[esp]
+ movdqa [edx-96],xmm5
+ movdqa xmm6,[48+ebp]
+ movdqa xmm5,xmm4
+ movdqu xmm4,[64+ebx]
+ paddq xmm6,xmm3
+db 102,15,56,0,229
+ movq mm1,mm4
+ movq mm7,[edx-80]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [48+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [16+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[8+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[40+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[24+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[48+esp]
+ paddq mm2,mm6
+ movq mm6,[56+esp]
+ movq mm1,mm4
+ movq mm7,[edx-72]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [40+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [8+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[32+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[16+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[40+esp]
+ paddq mm0,mm6
+ movq mm6,[48+esp]
+ movdqa [edx-80],xmm6
+ movdqa xmm7,[64+ebp]
+ movdqa xmm6,xmm5
+ movdqu xmm5,[80+ebx]
+ paddq xmm7,xmm4
+db 102,15,56,0,238
+ movq mm1,mm4
+ movq mm7,[edx-64]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [32+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[56+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[24+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[8+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[32+esp]
+ paddq mm2,mm6
+ movq mm6,[40+esp]
+ movq mm1,mm4
+ movq mm7,[edx-56]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [24+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [56+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[48+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[16+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[24+esp]
+ paddq mm0,mm6
+ movq mm6,[32+esp]
+ movdqa [edx-64],xmm7
+ movdqa [edx],xmm0
+ movdqa xmm0,[80+ebp]
+ movdqa xmm7,xmm6
+ movdqu xmm6,[96+ebx]
+ paddq xmm0,xmm5
+db 102,15,56,0,247
+ movq mm1,mm4
+ movq mm7,[edx-48]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [16+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [48+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[40+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[8+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[56+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[16+esp]
+ paddq mm2,mm6
+ movq mm6,[24+esp]
+ movq mm1,mm4
+ movq mm7,[edx-40]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [8+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [40+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[32+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[48+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[8+esp]
+ paddq mm0,mm6
+ movq mm6,[16+esp]
+ movdqa [edx-48],xmm0
+ movdqa [16+edx],xmm1
+ movdqa xmm1,[96+ebp]
+ movdqa xmm0,xmm7
+ movdqu xmm7,[112+ebx]
+ paddq xmm1,xmm6
+db 102,15,56,0,248
+ movq mm1,mm4
+ movq mm7,[edx-32]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [32+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[24+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[56+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[40+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[esp]
+ paddq mm2,mm6
+ movq mm6,[8+esp]
+ movq mm1,mm4
+ movq mm7,[edx-24]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [56+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [24+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[16+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[48+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[32+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[56+esp]
+ paddq mm0,mm6
+ movq mm6,[esp]
+ movdqa [edx-32],xmm1
+ movdqa [32+edx],xmm2
+ movdqa xmm2,[112+ebp]
+ movdqa xmm0,[edx]
+ paddq xmm2,xmm7
+ movq mm1,mm4
+ movq mm7,[edx-16]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [48+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm0,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [16+esp],mm0
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[8+esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[40+esp]
+ paddq mm3,mm7
+ movq mm5,mm0
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm0
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[24+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm0,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm2,mm0
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm2,mm1
+ pxor mm6,mm7
+ movq mm5,[48+esp]
+ paddq mm2,mm6
+ movq mm6,[56+esp]
+ movq mm1,mm4
+ movq mm7,[edx-8]
+ pxor mm5,mm6
+ psrlq mm1,14
+ movq [40+esp],mm4
+ pand mm5,mm4
+ psllq mm4,23
+ paddq mm2,mm3
+ movq mm3,mm1
+ psrlq mm1,4
+ pxor mm5,mm6
+ pxor mm3,mm4
+ psllq mm4,23
+ pxor mm3,mm1
+ movq [8+esp],mm2
+ paddq mm7,mm5
+ pxor mm3,mm4
+ psrlq mm1,23
+ paddq mm7,[esp]
+ pxor mm3,mm1
+ psllq mm4,4
+ pxor mm3,mm4
+ movq mm4,[32+esp]
+ paddq mm3,mm7
+ movq mm5,mm2
+ psrlq mm5,28
+ paddq mm4,mm3
+ movq mm6,mm2
+ movq mm7,mm5
+ psllq mm6,25
+ movq mm1,[16+esp]
+ psrlq mm5,6
+ pxor mm7,mm6
+ psllq mm6,5
+ pxor mm7,mm5
+ pxor mm2,mm1
+ psrlq mm5,5
+ pxor mm7,mm6
+ pand mm0,mm2
+ psllq mm6,6
+ pxor mm7,mm5
+ pxor mm0,mm1
+ pxor mm6,mm7
+ movq mm5,[40+esp]
+ paddq mm0,mm6
+ movq mm6,[48+esp]
+ movdqa [edx-16],xmm2
+ movq mm1,[8+esp]
+ paddq mm0,mm3
+ movq mm3,[24+esp]
+ movq mm7,[56+esp]
+ pxor mm2,mm1
+ paddq mm0,[esi]
+ paddq mm1,[8+esi]
+ paddq mm2,[16+esi]
+ paddq mm3,[24+esi]
+ paddq mm4,[32+esi]
+ paddq mm5,[40+esi]
+ paddq mm6,[48+esi]
+ paddq mm7,[56+esi]
+ movq [esi],mm0
+ movq [8+esi],mm1
+ movq [16+esi],mm2
+ movq [24+esi],mm3
+ movq [32+esi],mm4
+ movq [40+esi],mm5
+ movq [48+esi],mm6
+ movq [56+esi],mm7
+ cmp edi,eax
+ jb NEAR L$007loop_ssse3
+ mov esp,DWORD [76+edx]
+ emms
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 16
+L$002loop_x86:
+ mov eax,DWORD [edi]
+ mov ebx,DWORD [4+edi]
+ mov ecx,DWORD [8+edi]
+ mov edx,DWORD [12+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [16+edi]
+ mov ebx,DWORD [20+edi]
+ mov ecx,DWORD [24+edi]
+ mov edx,DWORD [28+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [32+edi]
+ mov ebx,DWORD [36+edi]
+ mov ecx,DWORD [40+edi]
+ mov edx,DWORD [44+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [48+edi]
+ mov ebx,DWORD [52+edi]
+ mov ecx,DWORD [56+edi]
+ mov edx,DWORD [60+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [64+edi]
+ mov ebx,DWORD [68+edi]
+ mov ecx,DWORD [72+edi]
+ mov edx,DWORD [76+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [80+edi]
+ mov ebx,DWORD [84+edi]
+ mov ecx,DWORD [88+edi]
+ mov edx,DWORD [92+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [96+edi]
+ mov ebx,DWORD [100+edi]
+ mov ecx,DWORD [104+edi]
+ mov edx,DWORD [108+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov eax,DWORD [112+edi]
+ mov ebx,DWORD [116+edi]
+ mov ecx,DWORD [120+edi]
+ mov edx,DWORD [124+edi]
+ bswap eax
+ bswap ebx
+ bswap ecx
+ bswap edx
+ push eax
+ push ebx
+ push ecx
+ push edx
+ add edi,128
+ sub esp,72
+ mov DWORD [204+esp],edi
+ lea edi,[8+esp]
+ mov ecx,16
+dd 2784229001
+align 16
+L$00900_15_x86:
+ mov ecx,DWORD [40+esp]
+ mov edx,DWORD [44+esp]
+ mov esi,ecx
+ shr ecx,9
+ mov edi,edx
+ shr edx,9
+ mov ebx,ecx
+ shl esi,14
+ mov eax,edx
+ shl edi,14
+ xor ebx,esi
+ shr ecx,5
+ xor eax,edi
+ shr edx,5
+ xor eax,ecx
+ shl esi,4
+ xor ebx,edx
+ shl edi,4
+ xor ebx,esi
+ shr ecx,4
+ xor eax,edi
+ shr edx,4
+ xor eax,ecx
+ shl esi,5
+ xor ebx,edx
+ shl edi,5
+ xor eax,esi
+ xor ebx,edi
+ mov ecx,DWORD [48+esp]
+ mov edx,DWORD [52+esp]
+ mov esi,DWORD [56+esp]
+ mov edi,DWORD [60+esp]
+ add eax,DWORD [64+esp]
+ adc ebx,DWORD [68+esp]
+ xor ecx,esi
+ xor edx,edi
+ and ecx,DWORD [40+esp]
+ and edx,DWORD [44+esp]
+ add eax,DWORD [192+esp]
+ adc ebx,DWORD [196+esp]
+ xor ecx,esi
+ xor edx,edi
+ mov esi,DWORD [ebp]
+ mov edi,DWORD [4+ebp]
+ add eax,ecx
+ adc ebx,edx
+ mov ecx,DWORD [32+esp]
+ mov edx,DWORD [36+esp]
+ add eax,esi
+ adc ebx,edi
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ add eax,ecx
+ adc ebx,edx
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ mov DWORD [32+esp],eax
+ mov DWORD [36+esp],ebx
+ mov esi,ecx
+ shr ecx,2
+ mov edi,edx
+ shr edx,2
+ mov ebx,ecx
+ shl esi,4
+ mov eax,edx
+ shl edi,4
+ xor ebx,esi
+ shr ecx,5
+ xor eax,edi
+ shr edx,5
+ xor ebx,ecx
+ shl esi,21
+ xor eax,edx
+ shl edi,21
+ xor eax,esi
+ shr ecx,21
+ xor ebx,edi
+ shr edx,21
+ xor eax,ecx
+ shl esi,5
+ xor ebx,edx
+ shl edi,5
+ xor eax,esi
+ xor ebx,edi
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ mov esi,DWORD [16+esp]
+ mov edi,DWORD [20+esp]
+ add eax,DWORD [esp]
+ adc ebx,DWORD [4+esp]
+ or ecx,esi
+ or edx,edi
+ and ecx,DWORD [24+esp]
+ and edx,DWORD [28+esp]
+ and esi,DWORD [8+esp]
+ and edi,DWORD [12+esp]
+ or ecx,esi
+ or edx,edi
+ add eax,ecx
+ adc ebx,edx
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ mov dl,BYTE [ebp]
+ sub esp,8
+ lea ebp,[8+ebp]
+ cmp dl,148
+ jne NEAR L$00900_15_x86
+align 16
+L$01016_79_x86:
+ mov ecx,DWORD [312+esp]
+ mov edx,DWORD [316+esp]
+ mov esi,ecx
+ shr ecx,1
+ mov edi,edx
+ shr edx,1
+ mov eax,ecx
+ shl esi,24
+ mov ebx,edx
+ shl edi,24
+ xor ebx,esi
+ shr ecx,6
+ xor eax,edi
+ shr edx,6
+ xor eax,ecx
+ shl esi,7
+ xor ebx,edx
+ shl edi,1
+ xor ebx,esi
+ shr ecx,1
+ xor eax,edi
+ shr edx,1
+ xor eax,ecx
+ shl edi,6
+ xor ebx,edx
+ xor eax,edi
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ mov ecx,DWORD [208+esp]
+ mov edx,DWORD [212+esp]
+ mov esi,ecx
+ shr ecx,6
+ mov edi,edx
+ shr edx,6
+ mov eax,ecx
+ shl esi,3
+ mov ebx,edx
+ shl edi,3
+ xor eax,esi
+ shr ecx,13
+ xor ebx,edi
+ shr edx,13
+ xor eax,ecx
+ shl esi,10
+ xor ebx,edx
+ shl edi,10
+ xor ebx,esi
+ shr ecx,10
+ xor eax,edi
+ shr edx,10
+ xor ebx,ecx
+ shl edi,13
+ xor eax,edx
+ xor eax,edi
+ mov ecx,DWORD [320+esp]
+ mov edx,DWORD [324+esp]
+ add eax,DWORD [esp]
+ adc ebx,DWORD [4+esp]
+ mov esi,DWORD [248+esp]
+ mov edi,DWORD [252+esp]
+ add eax,ecx
+ adc ebx,edx
+ add eax,esi
+ adc ebx,edi
+ mov DWORD [192+esp],eax
+ mov DWORD [196+esp],ebx
+ mov ecx,DWORD [40+esp]
+ mov edx,DWORD [44+esp]
+ mov esi,ecx
+ shr ecx,9
+ mov edi,edx
+ shr edx,9
+ mov ebx,ecx
+ shl esi,14
+ mov eax,edx
+ shl edi,14
+ xor ebx,esi
+ shr ecx,5
+ xor eax,edi
+ shr edx,5
+ xor eax,ecx
+ shl esi,4
+ xor ebx,edx
+ shl edi,4
+ xor ebx,esi
+ shr ecx,4
+ xor eax,edi
+ shr edx,4
+ xor eax,ecx
+ shl esi,5
+ xor ebx,edx
+ shl edi,5
+ xor eax,esi
+ xor ebx,edi
+ mov ecx,DWORD [48+esp]
+ mov edx,DWORD [52+esp]
+ mov esi,DWORD [56+esp]
+ mov edi,DWORD [60+esp]
+ add eax,DWORD [64+esp]
+ adc ebx,DWORD [68+esp]
+ xor ecx,esi
+ xor edx,edi
+ and ecx,DWORD [40+esp]
+ and edx,DWORD [44+esp]
+ add eax,DWORD [192+esp]
+ adc ebx,DWORD [196+esp]
+ xor ecx,esi
+ xor edx,edi
+ mov esi,DWORD [ebp]
+ mov edi,DWORD [4+ebp]
+ add eax,ecx
+ adc ebx,edx
+ mov ecx,DWORD [32+esp]
+ mov edx,DWORD [36+esp]
+ add eax,esi
+ adc ebx,edi
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ add eax,ecx
+ adc ebx,edx
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ mov DWORD [32+esp],eax
+ mov DWORD [36+esp],ebx
+ mov esi,ecx
+ shr ecx,2
+ mov edi,edx
+ shr edx,2
+ mov ebx,ecx
+ shl esi,4
+ mov eax,edx
+ shl edi,4
+ xor ebx,esi
+ shr ecx,5
+ xor eax,edi
+ shr edx,5
+ xor ebx,ecx
+ shl esi,21
+ xor eax,edx
+ shl edi,21
+ xor eax,esi
+ shr ecx,21
+ xor ebx,edi
+ shr edx,21
+ xor eax,ecx
+ shl esi,5
+ xor ebx,edx
+ shl edi,5
+ xor eax,esi
+ xor ebx,edi
+ mov ecx,DWORD [8+esp]
+ mov edx,DWORD [12+esp]
+ mov esi,DWORD [16+esp]
+ mov edi,DWORD [20+esp]
+ add eax,DWORD [esp]
+ adc ebx,DWORD [4+esp]
+ or ecx,esi
+ or edx,edi
+ and ecx,DWORD [24+esp]
+ and edx,DWORD [28+esp]
+ and esi,DWORD [8+esp]
+ and edi,DWORD [12+esp]
+ or ecx,esi
+ or edx,edi
+ add eax,ecx
+ adc ebx,edx
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],ebx
+ mov dl,BYTE [ebp]
+ sub esp,8
+ lea ebp,[8+ebp]
+ cmp dl,23
+ jne NEAR L$01016_79_x86
+ mov esi,DWORD [840+esp]
+ mov edi,DWORD [844+esp]
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [12+esi]
+ add eax,DWORD [8+esp]
+ adc ebx,DWORD [12+esp]
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],ebx
+ add ecx,DWORD [16+esp]
+ adc edx,DWORD [20+esp]
+ mov DWORD [8+esi],ecx
+ mov DWORD [12+esi],edx
+ mov eax,DWORD [16+esi]
+ mov ebx,DWORD [20+esi]
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [28+esi]
+ add eax,DWORD [24+esp]
+ adc ebx,DWORD [28+esp]
+ mov DWORD [16+esi],eax
+ mov DWORD [20+esi],ebx
+ add ecx,DWORD [32+esp]
+ adc edx,DWORD [36+esp]
+ mov DWORD [24+esi],ecx
+ mov DWORD [28+esi],edx
+ mov eax,DWORD [32+esi]
+ mov ebx,DWORD [36+esi]
+ mov ecx,DWORD [40+esi]
+ mov edx,DWORD [44+esi]
+ add eax,DWORD [40+esp]
+ adc ebx,DWORD [44+esp]
+ mov DWORD [32+esi],eax
+ mov DWORD [36+esi],ebx
+ add ecx,DWORD [48+esp]
+ adc edx,DWORD [52+esp]
+ mov DWORD [40+esi],ecx
+ mov DWORD [44+esi],edx
+ mov eax,DWORD [48+esi]
+ mov ebx,DWORD [52+esi]
+ mov ecx,DWORD [56+esi]
+ mov edx,DWORD [60+esi]
+ add eax,DWORD [56+esp]
+ adc ebx,DWORD [60+esp]
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],ebx
+ add ecx,DWORD [64+esp]
+ adc edx,DWORD [68+esp]
+ mov DWORD [56+esi],ecx
+ mov DWORD [60+esi],edx
+ add esp,840
+ sub ebp,640
+ cmp edi,DWORD [8+esp]
+ jb NEAR L$002loop_x86
+ mov esp,DWORD [12+esp]
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+align 64
+L$001K512:
+dd 3609767458,1116352408
+dd 602891725,1899447441
+dd 3964484399,3049323471
+dd 2173295548,3921009573
+dd 4081628472,961987163
+dd 3053834265,1508970993
+dd 2937671579,2453635748
+dd 3664609560,2870763221
+dd 2734883394,3624381080
+dd 1164996542,310598401
+dd 1323610764,607225278
+dd 3590304994,1426881987
+dd 4068182383,1925078388
+dd 991336113,2162078206
+dd 633803317,2614888103
+dd 3479774868,3248222580
+dd 2666613458,3835390401
+dd 944711139,4022224774
+dd 2341262773,264347078
+dd 2007800933,604807628
+dd 1495990901,770255983
+dd 1856431235,1249150122
+dd 3175218132,1555081692
+dd 2198950837,1996064986
+dd 3999719339,2554220882
+dd 766784016,2821834349
+dd 2566594879,2952996808
+dd 3203337956,3210313671
+dd 1034457026,3336571891
+dd 2466948901,3584528711
+dd 3758326383,113926993
+dd 168717936,338241895
+dd 1188179964,666307205
+dd 1546045734,773529912
+dd 1522805485,1294757372
+dd 2643833823,1396182291
+dd 2343527390,1695183700
+dd 1014477480,1986661051
+dd 1206759142,2177026350
+dd 344077627,2456956037
+dd 1290863460,2730485921
+dd 3158454273,2820302411
+dd 3505952657,3259730800
+dd 106217008,3345764771
+dd 3606008344,3516065817
+dd 1432725776,3600352804
+dd 1467031594,4094571909
+dd 851169720,275423344
+dd 3100823752,430227734
+dd 1363258195,506948616
+dd 3750685593,659060556
+dd 3785050280,883997877
+dd 3318307427,958139571
+dd 3812723403,1322822218
+dd 2003034995,1537002063
+dd 3602036899,1747873779
+dd 1575990012,1955562222
+dd 1125592928,2024104815
+dd 2716904306,2227730452
+dd 442776044,2361852424
+dd 593698344,2428436474
+dd 3733110249,2756734187
+dd 2999351573,3204031479
+dd 3815920427,3329325298
+dd 3928383900,3391569614
+dd 566280711,3515267271
+dd 3454069534,3940187606
+dd 4000239992,4118630271
+dd 1914138554,116418474
+dd 2731055270,174292421
+dd 3203993006,289380356
+dd 320620315,460393269
+dd 587496836,685471733
+dd 1086792851,852142971
+dd 365543100,1017036298
+dd 2618297676,1126000580
+dd 3409855158,1288033470
+dd 4234509866,1501505948
+dd 987167468,1607167915
+dd 1246189591,1816402316
+dd 67438087,66051
+dd 202182159,134810123
+db 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db 62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16