summaryrefslogtreecommitdiffstats
path: root/linux-x86/crypto/sha
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-05-11 17:20:37 -0700
committerKenny Root <kroot@google.com>2015-05-12 23:06:14 +0000
commite9ada863a7b3e81f5d2b1e3bdd2305da902a87f5 (patch)
tree6e43e34595ecf887c26c32b86d8ab097fe8cac64 /linux-x86/crypto/sha
parentb3106a0cc1493bbe0505c0ec0ce3da4ca90a29ae (diff)
downloadexternal_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.zip
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.gz
external_boringssl-e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5.tar.bz2
external/boringssl: bump revision.
This change bumps the BoringSSL revision to the current tip-of-tree. Change-Id: I91d5bf467e16e8d86cb19a4de873985f524e5faa
Diffstat (limited to 'linux-x86/crypto/sha')
-rw-r--r--linux-x86/crypto/sha/sha1-586.S1422
-rw-r--r--linux-x86/crypto/sha/sha256-586.S1643
-rw-r--r--linux-x86/crypto/sha/sha512-586.S2271
3 files changed, 5122 insertions, 214 deletions
diff --git a/linux-x86/crypto/sha/sha1-586.S b/linux-x86/crypto/sha/sha1-586.S
index 71ae5b3..808ccac 100644
--- a/linux-x86/crypto/sha/sha1-586.S
+++ b/linux-x86/crypto/sha/sha1-586.S
@@ -11,6 +11,23 @@ sha1_block_data_order:
pushl %ebx
pushl %esi
pushl %edi
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi
+ leal .LK_XX_XX-.L000pic_point(%ebp),%ebp
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ testl $512,%edx
+ jz .L001x86
+ movl 8(%esi),%ecx
+ testl $16777216,%eax
+ jz .L001x86
+ testl $536870912,%ecx
+ jnz .Lshaext_shortcut
+ jmp .Lssse3_shortcut
+.align 16
+.L001x86:
movl 20(%esp),%ebp
movl 24(%esp),%esi
movl 28(%esp),%eax
@@ -19,9 +36,9 @@ sha1_block_data_order:
addl %esi,%eax
movl %eax,104(%esp)
movl 16(%ebp),%edi
- jmp .L000loop
+ jmp .L002loop
.align 16
-.L000loop:
+.L002loop:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@@ -1368,7 +1385,7 @@ sha1_block_data_order:
movl %ebx,12(%ebp)
movl %edx,%esi
movl %ecx,16(%ebp)
- jb .L000loop
+ jb .L002loop
addl $76,%esp
popl %edi
popl %esi
@@ -1376,6 +1393,1405 @@ sha1_block_data_order:
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.hidden _sha1_block_data_order_shaext
+.type _sha1_block_data_order_shaext,@function
+.align 16
+_sha1_block_data_order_shaext:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L003pic_point
+.L003pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lshaext_shortcut:
+ movl 20(%esp),%edi
+ movl %esp,%ebx
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ subl $32,%esp
+ movdqu (%edi),%xmm0
+ movd 16(%edi),%xmm1
+ andl $-32,%esp
+ movdqa 80(%ebp),%xmm3
+ movdqu (%esi),%xmm4
+ pshufd $27,%xmm0,%xmm0
+ movdqu 16(%esi),%xmm5
+ pshufd $27,%xmm1,%xmm1
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,227
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,235
+.byte 102,15,56,0,243
+.byte 102,15,56,0,251
+ jmp .L004loop_shaext
+.align 16
+.L004loop_shaext:
+ decl %ecx
+ leal 64(%esi),%eax
+ movdqa %xmm1,(%esp)
+ paddd %xmm4,%xmm1
+ cmovnel %eax,%esi
+ movdqa %xmm0,16(%esp)
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+ movdqu (%esi),%xmm4
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+ movdqu 16(%esi),%xmm5
+.byte 102,15,56,0,227
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,235
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,243
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+ movdqa (%esp),%xmm2
+.byte 102,15,56,0,251
+.byte 15,56,200,202
+ paddd 16(%esp),%xmm0
+ jnz .L004loop_shaext
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm1,%xmm1
+ movdqu %xmm0,(%edi)
+ movd %xmm1,16(%edi)
+ movl %ebx,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
+.hidden _sha1_block_data_order_ssse3
+.type _sha1_block_data_order_ssse3,@function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L005pic_point
+.L005pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
+.Lssse3_shortcut:
+ movdqa (%ebp),%xmm7
+ movdqa 16(%ebp),%xmm0
+ movdqa 32(%ebp),%xmm1
+ movdqa 48(%ebp),%xmm2
+ movdqa 64(%ebp),%xmm6
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebp
+ movl 28(%esp),%edx
+ movl %esp,%esi
+ subl $208,%esp
+ andl $-64,%esp
+ movdqa %xmm0,112(%esp)
+ movdqa %xmm1,128(%esp)
+ movdqa %xmm2,144(%esp)
+ shll $6,%edx
+ movdqa %xmm7,160(%esp)
+ addl %ebp,%edx
+ movdqa %xmm6,176(%esp)
+ addl $64,%ebp
+ movl %edi,192(%esp)
+ movl %ebp,196(%esp)
+ movl %edx,200(%esp)
+ movl %esi,204(%esp)
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ movl 16(%edi),%edi
+ movl %ebx,%esi
+ movdqu -64(%ebp),%xmm0
+ movdqu -48(%ebp),%xmm1
+ movdqu -32(%ebp),%xmm2
+ movdqu -16(%ebp),%xmm3
+.byte 102,15,56,0,198
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+ movdqa %xmm7,96(%esp)
+.byte 102,15,56,0,222
+ paddd %xmm7,%xmm0
+ paddd %xmm7,%xmm1
+ paddd %xmm7,%xmm2
+ movdqa %xmm0,(%esp)
+ psubd %xmm7,%xmm0
+ movdqa %xmm1,16(%esp)
+ psubd %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ movl %ecx,%ebp
+ psubd %xmm7,%xmm2
+ xorl %edx,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebp,%esi
+ jmp .L006loop
+.align 16
+.L006loop:
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movl %eax,%ebp
+ punpcklqdq %xmm1,%xmm4
+ movdqa %xmm3,%xmm6
+ addl (%esp),%edi
+ xorl %ecx,%ebx
+ paddd %xmm3,%xmm7
+ movdqa %xmm0,64(%esp)
+ roll $5,%eax
+ addl %esi,%edi
+ psrldq $4,%xmm6
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ pxor %xmm0,%xmm4
+ addl %eax,%edi
+ rorl $7,%eax
+ pxor %xmm2,%xmm6
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ addl 4(%esp),%edx
+ pxor %xmm6,%xmm4
+ xorl %ebx,%eax
+ roll $5,%edi
+ movdqa %xmm7,48(%esp)
+ addl %ebp,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm0
+ xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ movdqa %xmm4,%xmm6
+ xorl %ebx,%esi
+ pslldq $12,%xmm0
+ paddd %xmm4,%xmm4
+ movl %edx,%ebp
+ addl 8(%esp),%ecx
+ psrld $31,%xmm6
+ xorl %eax,%edi
+ roll $5,%edx
+ movdqa %xmm0,%xmm7
+ addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
+ psrld $30,%xmm0
+ addl %edx,%ecx
+ rorl $7,%edx
+ por %xmm6,%xmm4
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ addl 12(%esp),%ebx
+ pslld $2,%xmm7
+ xorl %edi,%edx
+ roll $5,%ecx
+ pxor %xmm0,%xmm4
+ movdqa 96(%esp),%xmm0
+ addl %ebp,%ebx
+ andl %edx,%esi
+ pxor %xmm7,%xmm4
+ pshufd $238,%xmm1,%xmm5
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ punpcklqdq %xmm2,%xmm5
+ movdqa %xmm4,%xmm7
+ addl 16(%esp),%eax
+ xorl %edx,%ecx
+ paddd %xmm4,%xmm0
+ movdqa %xmm1,80(%esp)
+ roll $5,%ebx
+ addl %esi,%eax
+ psrldq $4,%xmm7
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ pxor %xmm1,%xmm5
+ addl %ebx,%eax
+ rorl $7,%ebx
+ pxor %xmm3,%xmm7
+ xorl %edx,%ebp
+ movl %eax,%esi
+ addl 20(%esp),%edi
+ pxor %xmm7,%xmm5
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa %xmm0,(%esp)
+ addl %ebp,%edi
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm1
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ rorl $7,%eax
+ movdqa %xmm5,%xmm7
+ xorl %ecx,%esi
+ pslldq $12,%xmm1
+ paddd %xmm5,%xmm5
+ movl %edi,%ebp
+ addl 24(%esp),%edx
+ psrld $31,%xmm7
+ xorl %ebx,%eax
+ roll $5,%edi
+ movdqa %xmm1,%xmm0
+ addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ psrld $30,%xmm1
+ addl %edi,%edx
+ rorl $7,%edi
+ por %xmm7,%xmm5
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ addl 28(%esp),%ecx
+ pslld $2,%xmm0
+ xorl %eax,%edi
+ roll $5,%edx
+ pxor %xmm1,%xmm5
+ movdqa 112(%esp),%xmm1
+ addl %ebp,%ecx
+ andl %edi,%esi
+ pxor %xmm0,%xmm5
+ pshufd $238,%xmm2,%xmm6
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ punpcklqdq %xmm3,%xmm6
+ movdqa %xmm5,%xmm0
+ addl 32(%esp),%ebx
+ xorl %edi,%edx
+ paddd %xmm5,%xmm1
+ movdqa %xmm2,96(%esp)
+ roll $5,%ecx
+ addl %esi,%ebx
+ psrldq $4,%xmm0
+ andl %edx,%ebp
+ xorl %edi,%edx
+ pxor %xmm2,%xmm6
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ pxor %xmm4,%xmm0
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ addl 36(%esp),%eax
+ pxor %xmm0,%xmm6
+ xorl %edx,%ecx
+ roll $5,%ebx
+ movdqa %xmm1,16(%esp)
+ addl %ebp,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm2
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ rorl $7,%ebx
+ movdqa %xmm6,%xmm0
+ xorl %edx,%esi
+ pslldq $12,%xmm2
+ paddd %xmm6,%xmm6
+ movl %eax,%ebp
+ addl 40(%esp),%edi
+ psrld $31,%xmm0
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa %xmm2,%xmm1
+ addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ psrld $30,%xmm2
+ addl %eax,%edi
+ rorl $7,%eax
+ por %xmm0,%xmm6
+ xorl %ecx,%ebp
+ movdqa 64(%esp),%xmm0
+ movl %edi,%esi
+ addl 44(%esp),%edx
+ pslld $2,%xmm1
+ xorl %ebx,%eax
+ roll $5,%edi
+ pxor %xmm2,%xmm6
+ movdqa 112(%esp),%xmm2
+ addl %ebp,%edx
+ andl %eax,%esi
+ pxor %xmm1,%xmm6
+ pshufd $238,%xmm3,%xmm7
+ xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ punpcklqdq %xmm4,%xmm7
+ movdqa %xmm6,%xmm1
+ addl 48(%esp),%ecx
+ xorl %eax,%edi
+ paddd %xmm6,%xmm2
+ movdqa %xmm3,64(%esp)
+ roll $5,%edx
+ addl %esi,%ecx
+ psrldq $4,%xmm1
+ andl %edi,%ebp
+ xorl %eax,%edi
+ pxor %xmm3,%xmm7
+ addl %edx,%ecx
+ rorl $7,%edx
+ pxor %xmm5,%xmm1
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ addl 52(%esp),%ebx
+ pxor %xmm1,%xmm7
+ xorl %edi,%edx
+ roll $5,%ecx
+ movdqa %xmm2,32(%esp)
+ addl %ebp,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm3
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ movdqa %xmm7,%xmm1
+ xorl %edi,%esi
+ pslldq $12,%xmm3
+ paddd %xmm7,%xmm7
+ movl %ebx,%ebp
+ addl 56(%esp),%eax
+ psrld $31,%xmm1
+ xorl %edx,%ecx
+ roll $5,%ebx
+ movdqa %xmm3,%xmm2
+ addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ psrld $30,%xmm3
+ addl %ebx,%eax
+ rorl $7,%ebx
+ por %xmm1,%xmm7
+ xorl %edx,%ebp
+ movdqa 80(%esp),%xmm1
+ movl %eax,%esi
+ addl 60(%esp),%edi
+ pslld $2,%xmm2
+ xorl %ecx,%ebx
+ roll $5,%eax
+ pxor %xmm3,%xmm7
+ movdqa 112(%esp),%xmm3
+ addl %ebp,%edi
+ andl %ebx,%esi
+ pxor %xmm2,%xmm7
+ pshufd $238,%xmm6,%xmm2
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ rorl $7,%eax
+ pxor %xmm4,%xmm0
+ punpcklqdq %xmm7,%xmm2
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ addl (%esp),%edx
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,80(%esp)
+ xorl %ebx,%eax
+ roll $5,%edi
+ movdqa %xmm3,%xmm4
+ addl %esi,%edx
+ paddd %xmm7,%xmm3
+ andl %eax,%ebp
+ pxor %xmm2,%xmm0
+ xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%ebp
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
+ roll $5,%edx
+ pslld $2,%xmm0
+ addl %ebp,%ecx
+ andl %edi,%esi
+ psrld $30,%xmm2
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ xorl %edi,%edx
+ roll $5,%ecx
+ por %xmm2,%xmm0
+ addl %esi,%ebx
+ andl %edx,%ebp
+ movdqa 96(%esp),%xmm2
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 12(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ pshufd $238,%xmm7,%xmm3
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 16(%esp),%edi
+ pxor %xmm5,%xmm1
+ punpcklqdq %xmm0,%xmm3
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,96(%esp)
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ movdqa %xmm4,%xmm5
+ rorl $7,%ebx
+ paddd %xmm0,%xmm4
+ addl %eax,%edi
+ pxor %xmm3,%xmm1
+ addl 20(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%esp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm3
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
+ por %xmm3,%xmm1
+ addl 28(%esp),%ebx
+ xorl %edi,%ebp
+ movdqa 64(%esp),%xmm3
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ pshufd $238,%xmm0,%xmm4
+ addl %ecx,%ebx
+ addl 32(%esp),%eax
+ pxor %xmm6,%xmm2
+ punpcklqdq %xmm1,%xmm4
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,64(%esp)
+ addl %esi,%eax
+ xorl %edx,%ebp
+ movdqa 128(%esp),%xmm6
+ rorl $7,%ecx
+ paddd %xmm1,%xmm5
+ addl %ebx,%eax
+ pxor %xmm4,%xmm2
+ addl 36(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ pslld $2,%xmm2
+ addl 40(%esp),%edx
+ xorl %ebx,%esi
+ psrld $30,%xmm4
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
+ por %xmm4,%xmm2
+ addl 44(%esp),%ecx
+ xorl %eax,%ebp
+ movdqa 80(%esp),%xmm4
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ pshufd $238,%xmm1,%xmm5
+ addl %edx,%ecx
+ addl 48(%esp),%ebx
+ pxor %xmm7,%xmm3
+ punpcklqdq %xmm2,%xmm5
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,80(%esp)
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ movdqa %xmm6,%xmm7
+ rorl $7,%edx
+ paddd %xmm2,%xmm6
+ addl %ecx,%ebx
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pslld $2,%xmm3
+ addl 56(%esp),%edi
+ xorl %ecx,%esi
+ psrld $30,%xmm5
+ movl %eax,%ebp
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
+ por %xmm5,%xmm3
+ addl 60(%esp),%edx
+ xorl %ebx,%ebp
+ movdqa 96(%esp),%xmm5
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ pshufd $238,%xmm2,%xmm6
+ addl %edi,%edx
+ addl (%esp),%ecx
+ pxor %xmm0,%xmm4
+ punpcklqdq %xmm3,%xmm6
+ xorl %eax,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ movdqa %xmm0,96(%esp)
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ movdqa %xmm7,%xmm0
+ rorl $7,%edi
+ paddd %xmm3,%xmm7
+ addl %edx,%ecx
+ pxor %xmm6,%xmm4
+ addl 4(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm6
+ movdqa %xmm7,48(%esp)
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pslld $2,%xmm4
+ addl 8(%esp),%eax
+ xorl %edx,%esi
+ psrld $30,%xmm6
+ movl %ebx,%ebp
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
+ por %xmm6,%xmm4
+ addl 12(%esp),%edi
+ xorl %ecx,%ebp
+ movdqa 64(%esp),%xmm6
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ pshufd $238,%xmm3,%xmm7
+ addl %eax,%edi
+ addl 16(%esp),%edx
+ pxor %xmm1,%xmm5
+ punpcklqdq %xmm4,%xmm7
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ pxor %xmm6,%xmm5
+ movdqa %xmm1,64(%esp)
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ movdqa %xmm0,%xmm1
+ rorl $7,%eax
+ paddd %xmm4,%xmm0
+ addl %edi,%edx
+ pxor %xmm7,%xmm5
+ addl 20(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm7
+ movdqa %xmm0,(%esp)
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
+ pslld $2,%xmm5
+ addl 24(%esp),%ebx
+ xorl %edi,%esi
+ psrld $30,%xmm7
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ por %xmm7,%xmm5
+ addl 28(%esp),%eax
+ movdqa 80(%esp),%xmm7
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%ebp
+ roll $5,%ebx
+ pshufd $238,%xmm4,%xmm0
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 32(%esp),%edi
+ pxor %xmm2,%xmm6
+ punpcklqdq %xmm5,%xmm0
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ pxor %xmm7,%xmm6
+ movdqa %xmm2,80(%esp)
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ roll $5,%eax
+ movdqa %xmm1,%xmm2
+ addl %esi,%edi
+ paddd %xmm5,%xmm1
+ xorl %ebx,%ebp
+ pxor %xmm0,%xmm6
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 36(%esp),%edx
+ andl %ebx,%ebp
+ movdqa %xmm6,%xmm0
+ movdqa %xmm1,16(%esp)
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %edi,%esi
+ xorl %ebx,%ebp
+ roll $5,%edi
+ pslld $2,%xmm6
+ addl %ebp,%edx
+ xorl %eax,%esi
+ psrld $30,%xmm0
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 40(%esp),%ecx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ rorl $7,%edi
+ por %xmm0,%xmm6
+ movl %edx,%ebp
+ xorl %eax,%esi
+ movdqa 96(%esp),%xmm0
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %edi,%ebp
+ xorl %eax,%edi
+ addl %edx,%ecx
+ pshufd $238,%xmm5,%xmm1
+ addl 44(%esp),%ebx
+ andl %edi,%ebp
+ xorl %eax,%edi
+ rorl $7,%edx
+ movl %ecx,%esi
+ xorl %edi,%ebp
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %edx,%esi
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 48(%esp),%eax
+ pxor %xmm3,%xmm7
+ punpcklqdq %xmm6,%xmm1
+ andl %edx,%esi
+ xorl %edi,%edx
+ rorl $7,%ecx
+ pxor %xmm0,%xmm7
+ movdqa %xmm3,96(%esp)
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ roll $5,%ebx
+ movdqa 144(%esp),%xmm3
+ addl %esi,%eax
+ paddd %xmm6,%xmm2
+ xorl %ecx,%ebp
+ pxor %xmm1,%xmm7
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 52(%esp),%edi
+ andl %ecx,%ebp
+ movdqa %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ xorl %ecx,%ebp
+ roll $5,%eax
+ pslld $2,%xmm7
+ addl %ebp,%edi
+ xorl %ebx,%esi
+ psrld $30,%xmm1
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 56(%esp),%edx
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ por %xmm1,%xmm7
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ movdqa 64(%esp),%xmm1
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
+ addl %edi,%edx
+ pshufd $238,%xmm6,%xmm2
+ addl 60(%esp),%ecx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ rorl $7,%edi
+ movl %edx,%esi
+ xorl %eax,%ebp
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %edi,%esi
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl (%esp),%ebx
+ pxor %xmm4,%xmm0
+ punpcklqdq %xmm7,%xmm2
+ andl %edi,%esi
+ xorl %eax,%edi
+ rorl $7,%edx
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,64(%esp)
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ roll $5,%ecx
+ movdqa %xmm3,%xmm4
+ addl %esi,%ebx
+ paddd %xmm7,%xmm3
+ xorl %edx,%ebp
+ pxor %xmm2,%xmm0
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 4(%esp),%eax
+ andl %edx,%ebp
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ xorl %edi,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%ebp
+ roll $5,%ebx
+ pslld $2,%xmm0
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ psrld $30,%xmm2
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 8(%esp),%edi
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ por %xmm2,%xmm0
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ movdqa 80(%esp),%xmm2
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ pshufd $238,%xmm7,%xmm3
+ addl 12(%esp),%edx
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %edi,%esi
+ xorl %ebx,%ebp
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 16(%esp),%ecx
+ pxor %xmm5,%xmm1
+ punpcklqdq %xmm0,%xmm3
+ andl %eax,%esi
+ xorl %ebx,%eax
+ rorl $7,%edi
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,80(%esp)
+ movl %edx,%ebp
+ xorl %eax,%esi
+ roll $5,%edx
+ movdqa %xmm4,%xmm5
+ addl %esi,%ecx
+ paddd %xmm0,%xmm4
+ xorl %edi,%ebp
+ pxor %xmm3,%xmm1
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl 20(%esp),%ebx
+ andl %edi,%ebp
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ xorl %eax,%edi
+ rorl $7,%edx
+ movl %ecx,%esi
+ xorl %edi,%ebp
+ roll $5,%ecx
+ pslld $2,%xmm1
+ addl %ebp,%ebx
+ xorl %edx,%esi
+ psrld $30,%xmm3
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ addl 24(%esp),%eax
+ andl %edx,%esi
+ xorl %edi,%edx
+ rorl $7,%ecx
+ por %xmm3,%xmm1
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ movdqa 96(%esp),%xmm3
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ pshufd $238,%xmm0,%xmm4
+ addl 28(%esp),%edi
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ xorl %ecx,%ebp
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%edi
+ addl 32(%esp),%edx
+ pxor %xmm6,%xmm2
+ punpcklqdq %xmm1,%xmm4
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,96(%esp)
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ roll $5,%edi
+ movdqa %xmm5,%xmm6
+ addl %esi,%edx
+ paddd %xmm1,%xmm5
+ xorl %eax,%ebp
+ pxor %xmm4,%xmm2
+ xorl %ebx,%eax
+ addl %edi,%edx
+ addl 36(%esp),%ecx
+ andl %eax,%ebp
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ xorl %ebx,%eax
+ rorl $7,%edi
+ movl %edx,%esi
+ xorl %eax,%ebp
+ roll $5,%edx
+ pslld $2,%xmm2
+ addl %ebp,%ecx
+ xorl %edi,%esi
+ psrld $30,%xmm4
+ xorl %eax,%edi
+ addl %edx,%ecx
+ addl 40(%esp),%ebx
+ andl %edi,%esi
+ xorl %eax,%edi
+ rorl $7,%edx
+ por %xmm4,%xmm2
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ movdqa 64(%esp),%xmm4
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edx,%ebp
+ xorl %edi,%edx
+ addl %ecx,%ebx
+ pshufd $238,%xmm1,%xmm5
+ addl 44(%esp),%eax
+ andl %edx,%ebp
+ xorl %edi,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%ebp
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ addl %ebx,%eax
+ addl 48(%esp),%edi
+ pxor %xmm7,%xmm3
+ punpcklqdq %xmm2,%xmm5
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,64(%esp)
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ movdqa %xmm6,%xmm7
+ rorl $7,%ebx
+ paddd %xmm2,%xmm6
+ addl %eax,%edi
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%esp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm5
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
+ por %xmm5,%xmm3
+ addl 60(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl (%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ paddd %xmm3,%xmm7
+ addl %ebx,%eax
+ addl 4(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ movdqa %xmm7,48(%esp)
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 8(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 12(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
+ movl 196(%esp),%ebp
+ cmpl 200(%esp),%ebp
+ je .L007done
+ movdqa 160(%esp),%xmm7
+ movdqa 176(%esp),%xmm6
+ movdqu (%ebp),%xmm0
+ movdqu 16(%ebp),%xmm1
+ movdqu 32(%ebp),%xmm2
+ movdqu 48(%ebp),%xmm3
+ addl $64,%ebp
+.byte 102,15,56,0,198
+ movl %ebp,196(%esp)
+ movdqa %xmm7,96(%esp)
+ addl 16(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+.byte 102,15,56,0,206
+ addl %ecx,%ebx
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ paddd %xmm7,%xmm0
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ movdqa %xmm0,(%esp)
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ psubd %xmm7,%xmm0
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+.byte 102,15,56,0,214
+ addl %edx,%ecx
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ paddd %xmm7,%xmm1
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm1,16(%esp)
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ psubd %xmm7,%xmm1
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+.byte 102,15,56,0,222
+ addl %edi,%edx
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ paddd %xmm7,%xmm2
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm2,32(%esp)
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ psubd %xmm7,%xmm2
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %ecx,%ebx
+ movl %edx,12(%ebp)
+ xorl %edx,%ebx
+ movl %edi,16(%ebp)
+ movl %esi,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L006loop
+.align 16
+.L007done:
+ addl 16(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ movl 204(%esp),%esp
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
+.align 64
+.LK_XX_XX:
+.long 1518500249,1518500249,1518500249,1518500249
+.long 1859775393,1859775393,1859775393,1859775393
+.long 2400959708,2400959708,2400959708,2400959708
+.long 3395469782,3395469782,3395469782,3395469782
+.long 66051,67438087,134810123,202182159
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
diff --git a/linux-x86/crypto/sha/sha256-586.S b/linux-x86/crypto/sha/sha256-586.S
index fe41afc..08d9484 100644
--- a/linux-x86/crypto/sha/sha256-586.S
+++ b/linux-x86/crypto/sha/sha256-586.S
@@ -27,6 +27,27 @@ sha256_block_data_order:
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ movl 8(%edx),%edx
+ testl $16777216,%ecx
+ jz .L003no_xmm
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz .L004shaext
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ testl $512,%ebx
+ jnz .L005SSSE3
+.L003no_xmm:
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L006unrolled
jmp .L002loop
.align 16
.L002loop:
@@ -98,7 +119,7 @@ sha256_block_data_order:
movl %ecx,28(%esp)
movl %edi,32(%esp)
.align 16
-.L00300_15:
+.L00700_15:
movl %edx,%ecx
movl 24(%esp),%esi
rorl $14,%ecx
@@ -136,11 +157,11 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3248222580,%esi
- jne .L00300_15
+ jne .L00700_15
movl 156(%esp),%ecx
- jmp .L00416_63
+ jmp .L00816_63
.align 16
-.L00416_63:
+.L00816_63:
movl %ecx,%ebx
movl 104(%esp),%esi
rorl $11,%ecx
@@ -195,7 +216,7 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3329325298,%esi
- jne .L00416_63
+ jne .L00816_63
movl 356(%esp),%esi
movl 8(%esp),%ebx
movl 16(%esp),%ecx
@@ -229,207 +250,6 @@ sha256_block_data_order:
popl %ebx
popl %ebp
ret
-.align 32
-.L005loop_shrd:
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- bswap %eax
- movl 12(%edi),%edx
- bswap %ebx
- pushl %eax
- bswap %ecx
- pushl %ebx
- bswap %edx
- pushl %ecx
- pushl %edx
- movl 16(%edi),%eax
- movl 20(%edi),%ebx
- movl 24(%edi),%ecx
- bswap %eax
- movl 28(%edi),%edx
- bswap %ebx
- pushl %eax
- bswap %ecx
- pushl %ebx
- bswap %edx
- pushl %ecx
- pushl %edx
- movl 32(%edi),%eax
- movl 36(%edi),%ebx
- movl 40(%edi),%ecx
- bswap %eax
- movl 44(%edi),%edx
- bswap %ebx
- pushl %eax
- bswap %ecx
- pushl %ebx
- bswap %edx
- pushl %ecx
- pushl %edx
- movl 48(%edi),%eax
- movl 52(%edi),%ebx
- movl 56(%edi),%ecx
- bswap %eax
- movl 60(%edi),%edx
- bswap %ebx
- pushl %eax
- bswap %ecx
- pushl %ebx
- bswap %edx
- pushl %ecx
- pushl %edx
- addl $64,%edi
- leal -36(%esp),%esp
- movl %edi,104(%esp)
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,8(%esp)
- xorl %ecx,%ebx
- movl %ecx,12(%esp)
- movl %edi,16(%esp)
- movl %ebx,(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%ebx
- movl 24(%esi),%ecx
- movl 28(%esi),%edi
- movl %ebx,24(%esp)
- movl %ecx,28(%esp)
- movl %edi,32(%esp)
-.align 16
-.L00600_15_shrd:
- movl %edx,%ecx
- movl 24(%esp),%esi
- shrdl $14,%ecx,%ecx
- movl 28(%esp),%edi
- xorl %edx,%ecx
- xorl %edi,%esi
- movl 96(%esp),%ebx
- shrdl $5,%ecx,%ecx
- andl %edx,%esi
- movl %edx,20(%esp)
- xorl %ecx,%edx
- addl 32(%esp),%ebx
- xorl %edi,%esi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %esi,%ebx
- shrdl $9,%ecx,%ecx
- addl %edx,%ebx
- movl 8(%esp),%edi
- xorl %eax,%ecx
- movl %eax,4(%esp)
- leal -4(%esp),%esp
- shrdl $11,%ecx,%ecx
- movl (%ebp),%esi
- xorl %eax,%ecx
- movl 20(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %esi,%ebx
- movl %eax,(%esp)
- addl %ebx,%edx
- andl 4(%esp),%eax
- addl %ecx,%ebx
- xorl %edi,%eax
- addl $4,%ebp
- addl %ebx,%eax
- cmpl $3248222580,%esi
- jne .L00600_15_shrd
- movl 156(%esp),%ecx
- jmp .L00716_63_shrd
-.align 16
-.L00716_63_shrd:
- movl %ecx,%ebx
- movl 104(%esp),%esi
- shrdl $11,%ecx,%ecx
- movl %esi,%edi
- shrdl $2,%esi,%esi
- xorl %ebx,%ecx
- shrl $3,%ebx
- shrdl $7,%ecx,%ecx
- xorl %edi,%esi
- xorl %ecx,%ebx
- shrdl $17,%esi,%esi
- addl 160(%esp),%ebx
- shrl $10,%edi
- addl 124(%esp),%ebx
- movl %edx,%ecx
- xorl %esi,%edi
- movl 24(%esp),%esi
- shrdl $14,%ecx,%ecx
- addl %edi,%ebx
- movl 28(%esp),%edi
- xorl %edx,%ecx
- xorl %edi,%esi
- movl %ebx,96(%esp)
- shrdl $5,%ecx,%ecx
- andl %edx,%esi
- movl %edx,20(%esp)
- xorl %ecx,%edx
- addl 32(%esp),%ebx
- xorl %edi,%esi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %esi,%ebx
- shrdl $9,%ecx,%ecx
- addl %edx,%ebx
- movl 8(%esp),%edi
- xorl %eax,%ecx
- movl %eax,4(%esp)
- leal -4(%esp),%esp
- shrdl $11,%ecx,%ecx
- movl (%ebp),%esi
- xorl %eax,%ecx
- movl 20(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %esi,%ebx
- movl %eax,(%esp)
- addl %ebx,%edx
- andl 4(%esp),%eax
- addl %ecx,%ebx
- xorl %edi,%eax
- movl 156(%esp),%ecx
- addl $4,%ebp
- addl %ebx,%eax
- cmpl $3329325298,%esi
- jne .L00716_63_shrd
- movl 356(%esp),%esi
- movl 8(%esp),%ebx
- movl 16(%esp),%ecx
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%edi
- addl 12(%esi),%ecx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %edi,8(%esi)
- movl %ecx,12(%esi)
- movl 24(%esp),%eax
- movl 28(%esp),%ebx
- movl 32(%esp),%ecx
- movl 360(%esp),%edi
- addl 16(%esi),%edx
- addl 20(%esi),%eax
- addl 24(%esi),%ebx
- addl 28(%esi),%ecx
- movl %edx,16(%esi)
- movl %eax,20(%esi)
- movl %ebx,24(%esi)
- movl %ecx,28(%esi)
- leal 356(%esp),%esp
- subl $256,%ebp
- cmpl 8(%esp),%edi
- jb .L005loop_shrd
- movl 12(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
.align 64
.L001K256:
.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
@@ -440,7 +260,7 @@ sha256_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.align 16
-.L008unrolled:
+.L006unrolled:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebp
@@ -3346,5 +3166,1414 @@ sha256_block_data_order:
popl %ebx
popl %ebp
ret
+.align 32
+.L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .L010loop_shaext
+.align 16
+.L010loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz .L010loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L005SSSE3:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ movdqa 256(%ebp),%xmm7
+ jmp .L011grand_ssse3
+.align 16
+.L011grand_ssse3:
+ movdqu (%edi),%xmm0
+ movdqu 16(%edi),%xmm1
+ movdqu 32(%edi),%xmm2
+ movdqu 48(%edi),%xmm3
+ addl $64,%edi
+.byte 102,15,56,0,199
+ movl %edi,100(%esp)
+.byte 102,15,56,0,207
+ movdqa (%ebp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 16(%ebp),%xmm5
+ paddd %xmm0,%xmm4
+.byte 102,15,56,0,223
+ movdqa 32(%ebp),%xmm6
+ paddd %xmm1,%xmm5
+ movdqa 48(%ebp),%xmm7
+ movdqa %xmm4,32(%esp)
+ paddd %xmm2,%xmm6
+ movdqa %xmm5,48(%esp)
+ paddd %xmm3,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ jmp .L012ssse3_00_47
+.align 16
+.L012ssse3_00_47:
+ addl $64,%ebp
+ movl %edx,%ecx
+ movdqa %xmm1,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,224,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,250,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm3,%xmm7
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm0
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm0
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm0,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa (%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm0,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,32(%esp)
+ movl %edx,%ecx
+ movdqa %xmm2,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,225,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,251,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm0,%xmm7
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm1
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm1
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm1,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 16(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm1,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,48(%esp)
+ movl %edx,%ecx
+ movdqa %xmm3,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,226,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,248,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm1,%xmm7
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm2
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm2
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm2,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 32(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm2,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,64(%esp)
+ movl %edx,%ecx
+ movdqa %xmm0,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,227,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,249,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm2,%xmm7
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm3
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm3
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm3,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 48(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm3,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L012ssse3_00_47
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ movdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L011grand_ssse3
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
#endif
diff --git a/linux-x86/crypto/sha/sha512-586.S b/linux-x86/crypto/sha/sha512-586.S
index 17f7945..a928400 100644
--- a/linux-x86/crypto/sha/sha512-586.S
+++ b/linux-x86/crypto/sha/sha512-586.S
@@ -27,6 +27,2269 @@ sha512_block_data_order:
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal OPENSSL_ia32cap_P-.L001K512(%ebp),%edx
+ movl (%edx),%ecx
+ testl $67108864,%ecx
+ jz .L002loop_x86
+ movl 4(%edx),%edx
+ movq (%esi),%mm0
+ andl $16777216,%ecx
+ movq 8(%esi),%mm1
+ andl $512,%edx
+ movq 16(%esi),%mm2
+ orl %edx,%ecx
+ movq 24(%esi),%mm3
+ movq 32(%esi),%mm4
+ movq 40(%esi),%mm5
+ movq 48(%esi),%mm6
+ movq 56(%esi),%mm7
+ cmpl $16777728,%ecx
+ je .L003SSSE3
+ subl $80,%esp
+ jmp .L004loop_sse2
+.align 16
+.L004loop_sse2:
+ movq %mm1,8(%esp)
+ movq %mm2,16(%esp)
+ movq %mm3,24(%esp)
+ movq %mm5,40(%esp)
+ movq %mm6,48(%esp)
+ pxor %mm1,%mm2
+ movq %mm7,56(%esp)
+ movq %mm0,%mm3
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ addl $8,%edi
+ movl $15,%edx
+ bswap %eax
+ bswap %ebx
+ jmp .L00500_14_sse2
+.align 16
+.L00500_14_sse2:
+ movd %eax,%mm1
+ movl (%edi),%eax
+ movd %ebx,%mm7
+ movl 4(%edi),%ebx
+ addl $8,%edi
+ bswap %eax
+ bswap %ebx
+ punpckldq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
+ movq 48(%esp),%mm6
+ decl %edx
+ jnz .L00500_14_sse2
+ movd %eax,%mm1
+ movd %ebx,%mm7
+ punpckldq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
+ pxor %mm0,%mm0
+ movl $32,%edx
+ jmp .L00616_79_sse2
+.align 16
+.L00616_79_sse2:
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm0
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm2
+ addl $8,%ebp
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm2
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm0
+ addl $8,%ebp
+ decl %edx
+ jnz .L00616_79_sse2
+ paddq %mm3,%mm0
+ movq 8(%esp),%mm1
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ pxor %mm1,%mm2
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movl $640,%eax
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ leal (%esp,%eax,1),%esp
+ subl %eax,%ebp
+ cmpl 88(%esp),%edi
+ jb .L004loop_sse2
+ movl 92(%esp),%esp
+ emms
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003SSSE3:
+ leal -64(%esp),%edx
+ subl $256,%esp
+ movdqa 640(%ebp),%xmm1
+ movdqu (%edi),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%edi),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%edi),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%edi),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%edi),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%edi),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%edi),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%edi),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movdqa %xmm2,-16(%edx)
+ nop
+.align 32
+.L007loop_ssse3:
+ movdqa 16(%edx),%xmm2
+ movdqa %xmm3,48(%edx)
+ leal 128(%ebp),%ebp
+ movq %mm1,8(%esp)
+ movl %edi,%ebx
+ movq %mm2,16(%esp)
+ leal 128(%edi),%edi
+ movq %mm3,24(%esp)
+ cmpl %eax,%edi
+ movq %mm5,40(%esp)
+ cmovbl %edi,%ebx
+ movq %mm6,48(%esp)
+ movl $4,%ecx
+ pxor %mm1,%mm2
+ movq %mm7,56(%esp)
+ pxor %mm3,%mm3
+ jmp .L00800_47_ssse3
+.align 32
+.L00800_47_ssse3:
+ movdqa %xmm5,%xmm3
+ movdqa %xmm2,%xmm1
+.byte 102,15,58,15,208,8
+ movdqa %xmm4,(%edx)
+.byte 102,15,58,15,220,8
+ movdqa %xmm2,%xmm4
+ psrlq $7,%xmm2
+ paddq %xmm3,%xmm0
+ movdqa %xmm4,%xmm3
+ psrlq $1,%xmm4
+ psllq $56,%xmm3
+ pxor %xmm4,%xmm2
+ psrlq $7,%xmm4
+ pxor %xmm3,%xmm2
+ psllq $7,%xmm3
+ pxor %xmm4,%xmm2
+ movdqa %xmm7,%xmm4
+ pxor %xmm3,%xmm2
+ movdqa %xmm7,%xmm3
+ psrlq $6,%xmm4
+ paddq %xmm2,%xmm0
+ movdqa %xmm7,%xmm2
+ psrlq $19,%xmm3
+ psllq $3,%xmm2
+ pxor %xmm3,%xmm4
+ psrlq $42,%xmm3
+ pxor %xmm2,%xmm4
+ psllq $42,%xmm2
+ pxor %xmm3,%xmm4
+ movdqa 32(%edx),%xmm3
+ pxor %xmm2,%xmm4
+ movdqa (%ebp),%xmm2
+ movq %mm4,%mm1
+ paddq %xmm4,%xmm0
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ paddq %xmm0,%xmm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm2,-128(%edx)
+ movdqa %xmm6,%xmm4
+ movdqa %xmm3,%xmm2
+.byte 102,15,58,15,217,8
+ movdqa %xmm5,16(%edx)
+.byte 102,15,58,15,229,8
+ movdqa %xmm3,%xmm5
+ psrlq $7,%xmm3
+ paddq %xmm4,%xmm1
+ movdqa %xmm5,%xmm4
+ psrlq $1,%xmm5
+ psllq $56,%xmm4
+ pxor %xmm5,%xmm3
+ psrlq $7,%xmm5
+ pxor %xmm4,%xmm3
+ psllq $7,%xmm4
+ pxor %xmm5,%xmm3
+ movdqa %xmm0,%xmm5
+ pxor %xmm4,%xmm3
+ movdqa %xmm0,%xmm4
+ psrlq $6,%xmm5
+ paddq %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psrlq $19,%xmm4
+ psllq $3,%xmm3
+ pxor %xmm4,%xmm5
+ psrlq $42,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $42,%xmm3
+ pxor %xmm4,%xmm5
+ movdqa 48(%edx),%xmm4
+ pxor %xmm3,%xmm5
+ movdqa 16(%ebp),%xmm3
+ movq %mm4,%mm1
+ paddq %xmm5,%xmm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm1,%xmm3
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm3,-112(%edx)
+ movdqa %xmm7,%xmm5
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,226,8
+ movdqa %xmm6,32(%edx)
+.byte 102,15,58,15,238,8
+ movdqa %xmm4,%xmm6
+ psrlq $7,%xmm4
+ paddq %xmm5,%xmm2
+ movdqa %xmm6,%xmm5
+ psrlq $1,%xmm6
+ psllq $56,%xmm5
+ pxor %xmm6,%xmm4
+ psrlq $7,%xmm6
+ pxor %xmm5,%xmm4
+ psllq $7,%xmm5
+ pxor %xmm6,%xmm4
+ movdqa %xmm1,%xmm6
+ pxor %xmm5,%xmm4
+ movdqa %xmm1,%xmm5
+ psrlq $6,%xmm6
+ paddq %xmm4,%xmm2
+ movdqa %xmm1,%xmm4
+ psrlq $19,%xmm5
+ psllq $3,%xmm4
+ pxor %xmm5,%xmm6
+ psrlq $42,%xmm5
+ pxor %xmm4,%xmm6
+ psllq $42,%xmm4
+ pxor %xmm5,%xmm6
+ movdqa (%edx),%xmm5
+ pxor %xmm4,%xmm6
+ movdqa 32(%ebp),%xmm4
+ movq %mm4,%mm1
+ paddq %xmm6,%xmm2
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm2,%xmm4
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm4,-96(%edx)
+ movdqa %xmm0,%xmm6
+ movdqa %xmm5,%xmm4
+.byte 102,15,58,15,235,8
+ movdqa %xmm7,48(%edx)
+.byte 102,15,58,15,247,8
+ movdqa %xmm5,%xmm7
+ psrlq $7,%xmm5
+ paddq %xmm6,%xmm3
+ movdqa %xmm7,%xmm6
+ psrlq $1,%xmm7
+ psllq $56,%xmm6
+ pxor %xmm7,%xmm5
+ psrlq $7,%xmm7
+ pxor %xmm6,%xmm5
+ psllq $7,%xmm6
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm7
+ pxor %xmm6,%xmm5
+ movdqa %xmm2,%xmm6
+ psrlq $6,%xmm7
+ paddq %xmm5,%xmm3
+ movdqa %xmm2,%xmm5
+ psrlq $19,%xmm6
+ psllq $3,%xmm5
+ pxor %xmm6,%xmm7
+ psrlq $42,%xmm6
+ pxor %xmm5,%xmm7
+ psllq $42,%xmm5
+ pxor %xmm6,%xmm7
+ movdqa 16(%edx),%xmm6
+ pxor %xmm5,%xmm7
+ movdqa 48(%ebp),%xmm5
+ movq %mm4,%mm1
+ paddq %xmm7,%xmm3
+ movq -80(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm3,%xmm5
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm5,-80(%edx)
+ movdqa %xmm1,%xmm7
+ movdqa %xmm6,%xmm5
+.byte 102,15,58,15,244,8
+ movdqa %xmm0,(%edx)
+.byte 102,15,58,15,248,8
+ movdqa %xmm6,%xmm0
+ psrlq $7,%xmm6
+ paddq %xmm7,%xmm4
+ movdqa %xmm0,%xmm7
+ psrlq $1,%xmm0
+ psllq $56,%xmm7
+ pxor %xmm0,%xmm6
+ psrlq $7,%xmm0
+ pxor %xmm7,%xmm6
+ psllq $7,%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm3,%xmm0
+ pxor %xmm7,%xmm6
+ movdqa %xmm3,%xmm7
+ psrlq $6,%xmm0
+ paddq %xmm6,%xmm4
+ movdqa %xmm3,%xmm6
+ psrlq $19,%xmm7
+ psllq $3,%xmm6
+ pxor %xmm7,%xmm0
+ psrlq $42,%xmm7
+ pxor %xmm6,%xmm0
+ psllq $42,%xmm6
+ pxor %xmm7,%xmm0
+ movdqa 32(%edx),%xmm7
+ pxor %xmm6,%xmm0
+ movdqa 64(%ebp),%xmm6
+ movq %mm4,%mm1
+ paddq %xmm0,%xmm4
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ paddq %xmm4,%xmm6
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm6,-64(%edx)
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm6
+.byte 102,15,58,15,253,8
+ movdqa %xmm1,16(%edx)
+.byte 102,15,58,15,193,8
+ movdqa %xmm7,%xmm1
+ psrlq $7,%xmm7
+ paddq %xmm0,%xmm5
+ movdqa %xmm1,%xmm0
+ psrlq $1,%xmm1
+ psllq $56,%xmm0
+ pxor %xmm1,%xmm7
+ psrlq $7,%xmm1
+ pxor %xmm0,%xmm7
+ psllq $7,%xmm0
+ pxor %xmm1,%xmm7
+ movdqa %xmm4,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm4,%xmm0
+ psrlq $6,%xmm1
+ paddq %xmm7,%xmm5
+ movdqa %xmm4,%xmm7
+ psrlq $19,%xmm0
+ psllq $3,%xmm7
+ pxor %xmm0,%xmm1
+ psrlq $42,%xmm0
+ pxor %xmm7,%xmm1
+ psllq $42,%xmm7
+ pxor %xmm0,%xmm1
+ movdqa 48(%edx),%xmm0
+ pxor %xmm7,%xmm1
+ movdqa 80(%ebp),%xmm7
+ movq %mm4,%mm1
+ paddq %xmm1,%xmm5
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm5,%xmm7
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm7,-48(%edx)
+ movdqa %xmm3,%xmm1
+ movdqa %xmm0,%xmm7
+.byte 102,15,58,15,198,8
+ movdqa %xmm2,32(%edx)
+.byte 102,15,58,15,202,8
+ movdqa %xmm0,%xmm2
+ psrlq $7,%xmm0
+ paddq %xmm1,%xmm6
+ movdqa %xmm2,%xmm1
+ psrlq $1,%xmm2
+ psllq $56,%xmm1
+ pxor %xmm2,%xmm0
+ psrlq $7,%xmm2
+ pxor %xmm1,%xmm0
+ psllq $7,%xmm1
+ pxor %xmm2,%xmm0
+ movdqa %xmm5,%xmm2
+ pxor %xmm1,%xmm0
+ movdqa %xmm5,%xmm1
+ psrlq $6,%xmm2
+ paddq %xmm0,%xmm6
+ movdqa %xmm5,%xmm0
+ psrlq $19,%xmm1
+ psllq $3,%xmm0
+ pxor %xmm1,%xmm2
+ psrlq $42,%xmm1
+ pxor %xmm0,%xmm2
+ psllq $42,%xmm0
+ pxor %xmm1,%xmm2
+ movdqa (%edx),%xmm1
+ pxor %xmm0,%xmm2
+ movdqa 96(%ebp),%xmm0
+ movq %mm4,%mm1
+ paddq %xmm2,%xmm6
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm6,%xmm0
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm0,-32(%edx)
+ movdqa %xmm4,%xmm2
+ movdqa %xmm1,%xmm0
+.byte 102,15,58,15,207,8
+ movdqa %xmm3,48(%edx)
+.byte 102,15,58,15,211,8
+ movdqa %xmm1,%xmm3
+ psrlq $7,%xmm1
+ paddq %xmm2,%xmm7
+ movdqa %xmm3,%xmm2
+ psrlq $1,%xmm3
+ psllq $56,%xmm2
+ pxor %xmm3,%xmm1
+ psrlq $7,%xmm3
+ pxor %xmm2,%xmm1
+ psllq $7,%xmm2
+ pxor %xmm3,%xmm1
+ movdqa %xmm6,%xmm3
+ pxor %xmm2,%xmm1
+ movdqa %xmm6,%xmm2
+ psrlq $6,%xmm3
+ paddq %xmm1,%xmm7
+ movdqa %xmm6,%xmm1
+ psrlq $19,%xmm2
+ psllq $3,%xmm1
+ pxor %xmm2,%xmm3
+ psrlq $42,%xmm2
+ pxor %xmm1,%xmm3
+ psllq $42,%xmm1
+ pxor %xmm2,%xmm3
+ movdqa 16(%edx),%xmm2
+ pxor %xmm1,%xmm3
+ movdqa 112(%ebp),%xmm1
+ movq %mm4,%mm1
+ paddq %xmm3,%xmm7
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm7,%xmm1
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm1,-16(%edx)
+ leal 128(%ebp),%ebp
+ decl %ecx
+ jnz .L00800_47_ssse3
+ movdqa (%ebp),%xmm1
+ leal -640(%ebp),%ebp
+ movdqu (%ebx),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%ebx),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movq %mm4,%mm1
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%ebx),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movq %mm4,%mm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%ebx),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movq %mm4,%mm1
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%ebx),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movq %mm4,%mm1
+ movq -80(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%ebx),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
+ movq %mm4,%mm1
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%ebx),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movq %mm4,%mm1
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%ebx),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movq %mm4,%mm1
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movq %mm4,%mm1
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm2,-16(%edx)
+ movq 8(%esp),%mm1
+ paddq %mm3,%mm0
+ movq 24(%esp),%mm3
+ movq 56(%esp),%mm7
+ pxor %mm1,%mm2
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ cmpl %eax,%edi
+ jb .L007loop_ssse3
+ movl 76(%edx),%esp
+ emms
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.align 16
.L002loop_x86:
movl (%edi),%eax
@@ -132,7 +2395,7 @@ sha512_block_data_order:
movl $16,%ecx
.long 2784229001
.align 16
-.L00300_15_x86:
+.L00900_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -239,9 +2502,9 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00300_15_x86
+ jne .L00900_15_x86
.align 16
-.L00416_79_x86:
+.L01016_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -414,7 +2677,7 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00416_79_x86
+ jne .L01016_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax